diff --git a/.gitignore b/.gitignore
index 228365d9e..039d2400a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,7 +9,6 @@ Thumbs.db
 tags
 tegra/
 bin/
-CMakeFiles/
 *.sdf
 *.opensdf
 *.obj
@@ -17,3 +16,9 @@ CMakeFiles/
 *.depend
 *.rule
 *.tmp
+*/debug
+*/CMakeFiles
+CMakeCache.txt
+*.suo
+*.log
+*.tlog
\ No newline at end of file
diff --git a/3rdparty/libjasper/jas_cm.c b/3rdparty/libjasper/jas_cm.c
index dc23ead89..16d4a502d 100644
--- a/3rdparty/libjasper/jas_cm.c
+++ b/3rdparty/libjasper/jas_cm.c
@@ -842,7 +842,6 @@ static int jas_cmshapmat_apply(jas_cmpxform_t *pxform, jas_cmreal_t *in,
                 *dst++ = a2;
             }
         } else {
-assert(0);
             while (--cnt >= 0) {
                 a0 = *src++;
                 src++;
diff --git a/3rdparty/libjasper/jas_stream.c b/3rdparty/libjasper/jas_stream.c
index ca1239c7d..3ba7a837d 100644
--- a/3rdparty/libjasper/jas_stream.c
+++ b/3rdparty/libjasper/jas_stream.c
@@ -345,6 +345,7 @@ jas_stream_t *jas_stream_tmpfile()
 {
     jas_stream_t *stream;
     jas_stream_fileobj_t *obj;
+    char *tmpname;
 
     if (!(stream = jas_stream_create())) {
         return 0;
@@ -365,10 +366,12 @@ jas_stream_t *jas_stream_tmpfile()
 
 #ifdef _WIN32
     /* Choose a file name. */
-    tmpnam(obj->pathname);
+    tmpname = tempnam(NULL, NULL);
+    strcpy(obj->pathname, tmpname);
+    free(tmpname);
 
     /* Open the underlying file. */
-    if ((obj->fd = open(obj->pathname, O_CREAT | O_EXCL | O_RDWR | O_TRUNC | O_BINARY,
+    if ((obj->fd = open(obj->pathname, O_CREAT | O_EXCL | O_RDWR | O_TRUNC | O_BINARY | O_TEMPORARY | _O_SHORT_LIVED,
       JAS_STREAM_PERMS)) < 0) {
         jas_stream_destroy(stream);
         return 0;
diff --git a/3rdparty/libjpeg/CMakeLists.txt b/3rdparty/libjpeg/CMakeLists.txt
index 65a9d1c8a..d79f00ada 100644
--- a/3rdparty/libjpeg/CMakeLists.txt
+++ b/3rdparty/libjpeg/CMakeLists.txt
@@ -15,6 +15,13 @@ else()
   ocv_list_filterout(lib_srcs jmemnobs.c)
 endif()
 
+if(WINRT)
+    add_definitions(-DNO_GETENV)
+    get_directory_property( DirDefs COMPILE_DEFINITIONS )
+    message(STATUS "Adding NO_GETENV to compiler definitions for WINRT:")
+    message(STATUS "   COMPILE_DEFINITIONS = ${DirDefs}")
+endif()
+
 # ----------------------------------------------------------------------------------
 #         Define the library target:
 # ----------------------------------------------------------------------------------
diff --git a/3rdparty/libtiff/CMakeLists.txt b/3rdparty/libtiff/CMakeLists.txt
index ad8a46618..b7739e0e4 100644
--- a/3rdparty/libtiff/CMakeLists.txt
+++ b/3rdparty/libtiff/CMakeLists.txt
@@ -17,7 +17,7 @@ check_include_file(string.h HAVE_STRING_H)
 check_include_file(sys/types.h HAVE_SYS_TYPES_H)
 check_include_file(unistd.h HAVE_UNISTD_H)
 
-if(WIN32)
+if(WIN32 AND NOT WINRT)
   set(USE_WIN32_FILEIO 1)
 endif()
 
@@ -79,7 +79,7 @@ set(lib_srcs
     "${CMAKE_CURRENT_BINARY_DIR}/tif_config.h"
     )
 
-if(WIN32)
+if(WIN32 AND NOT WINRT)
   list(APPEND lib_srcs tif_win32.c)
 else()
   list(APPEND lib_srcs tif_unix.c)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bc81d51cd..746faac20 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -244,6 +244,7 @@ OCV_OPTION(ENABLE_NOISY_WARNINGS      "Show all warnings even if they are too no
 OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors"                                 OFF )
 OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with native libraries" OFF  IF ANDROID )
 OCV_OPTION(ENABLE_IMPL_COLLECTION     "Collect implementation data on function call"             OFF )
+OCV_OPTION(GENERATE_ABI_DESCRIPTOR    "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
 
 if(ENABLE_IMPL_COLLECTION)
   add_definitions(-DCV_COLLECT_IMPL_DATA)
@@ -274,8 +275,6 @@ endif()
 
 if(ANDROID OR WIN32)
   set(OPENCV_DOC_INSTALL_PATH doc)
-elseif(INSTALL_TO_MANGLED_PATHS)
-  set(OPENCV_DOC_INSTALL_PATH share/OpenCV-${OPENCV_VERSION}/doc)
 else()
   set(OPENCV_DOC_INSTALL_PATH share/OpenCV/doc)
 endif()
@@ -309,6 +308,10 @@ if(NOT OPENCV_TEST_INSTALL_PATH)
   set(OPENCV_TEST_INSTALL_PATH "${OPENCV_BIN_INSTALL_PATH}")
 endif()
 
+if (OPENCV_TEST_DATA_PATH)
+  get_filename_component(OPENCV_TEST_DATA_PATH ${OPENCV_TEST_DATA_PATH} ABSOLUTE)
+endif()
+
 if(OPENCV_TEST_DATA_PATH AND NOT OPENCV_TEST_DATA_INSTALL_PATH)
   if(ANDROID)
     set(OPENCV_TEST_DATA_INSTALL_PATH "sdk/etc/testdata")
@@ -327,9 +330,11 @@ if(ANDROID)
   set(OPENCV_CONFIG_INSTALL_PATH  sdk/native/jni)
   set(OPENCV_INCLUDE_INSTALL_PATH sdk/native/jni/include)
   set(OPENCV_SAMPLES_SRC_INSTALL_PATH samples/native)
+  set(OPENCV_OTHER_INSTALL_PATH   sdk/etc)
 else()
   set(LIBRARY_OUTPUT_PATH         "${OpenCV_BINARY_DIR}/lib")
   set(3P_LIBRARY_OUTPUT_PATH      "${OpenCV_BINARY_DIR}/3rdparty/lib${LIB_SUFFIX}")
+
   if(WIN32 AND CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
     if(OpenCV_STATIC)
       set(OPENCV_LIB_INSTALL_PATH   "${OpenCV_INSTALL_BINARIES_PREFIX}staticlib${LIB_SUFFIX}")
@@ -338,10 +343,14 @@ else()
     endif()
     set(OPENCV_3P_LIB_INSTALL_PATH  "${OpenCV_INSTALL_BINARIES_PREFIX}staticlib${LIB_SUFFIX}")
     set(OPENCV_SAMPLES_SRC_INSTALL_PATH    samples/native)
+    set(OPENCV_JAR_INSTALL_PATH java)
+    set(OPENCV_OTHER_INSTALL_PATH   etc)
   else()
     set(OPENCV_LIB_INSTALL_PATH     lib${LIB_SUFFIX})
     set(OPENCV_3P_LIB_INSTALL_PATH  share/OpenCV/3rdparty/${OPENCV_LIB_INSTALL_PATH})
     set(OPENCV_SAMPLES_SRC_INSTALL_PATH    share/OpenCV/samples)
+    set(OPENCV_JAR_INSTALL_PATH share/OpenCV/java)
+    set(OPENCV_OTHER_INSTALL_PATH   share/OpenCV)
   endif()
   set(OPENCV_INCLUDE_INSTALL_PATH "include")
 
@@ -358,8 +367,16 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 
 if(INSTALL_TO_MANGLED_PATHS)
   set(OPENCV_INCLUDE_INSTALL_PATH ${OPENCV_INCLUDE_INSTALL_PATH}/opencv-${OPENCV_VERSION})
+  string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OPENCV_3P_LIB_INSTALL_PATH "${OPENCV_3P_LIB_INSTALL_PATH}")
+  string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OPENCV_SAMPLES_SRC_INSTALL_PATH "${OPENCV_SAMPLES_SRC_INSTALL_PATH}")
+  string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OPENCV_CONFIG_INSTALL_PATH "${OPENCV_CONFIG_INSTALL_PATH}")
+  string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OPENCV_DOC_INSTALL_PATH "${OPENCV_DOC_INSTALL_PATH}")
+  string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OPENCV_JAR_INSTALL_PATH "${OPENCV_JAR_INSTALL_PATH}")
+  string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OPENCV_TEST_DATA_INSTALL_PATH "${OPENCV_TEST_DATA_INSTALL_PATH}")
+  string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OPENCV_OTHER_INSTALL_PATH "${OPENCV_OTHER_INSTALL_PATH}")
 endif()
 
+
 if(WIN32)
   # Postfix of DLLs:
   set(OPENCV_DLLVERSION "${OPENCV_VERSION_MAJOR}${OPENCV_VERSION_MINOR}${OPENCV_VERSION_PATCH}")
@@ -623,6 +640,9 @@ include(cmake/OpenCVGenConfig.cmake)
 # Generate Info.plist for the IOS framework
 include(cmake/OpenCVGenInfoPlist.cmake)
 
+# Generate ABI descriptor
+include(cmake/OpenCVGenABI.cmake)
+
 # Generate environment setup file
 if(INSTALL_TESTS AND OPENCV_TEST_DATA_PATH)
   if(ANDROID)
@@ -963,8 +983,9 @@ if(DEFINED WITH_V4L)
   else()
     set(HAVE_CAMV4L2_STR "NO")
   endif()
-  status("    V4L/V4L2:"       HAVE_LIBV4L         THEN "Using libv4l (ver ${ALIASOF_libv4l1_VERSION})"
-                                                   ELSE "${HAVE_CAMV4L_STR}/${HAVE_CAMV4L2_STR}")
+  status("    V4L/V4L2:"       HAVE_LIBV4L
+             THEN "Using libv4l1 (ver ${ALIASOF_libv4l1_VERSION}) / libv4l2 (ver ${ALIASOF_libv4l2_VERSION})"
+             ELSE "${HAVE_CAMV4L_STR}/${HAVE_CAMV4L2_STR}")
 endif(DEFINED WITH_V4L)
 
 if(DEFINED WITH_DSHOW)
diff --git a/apps/traincascade/cascadeclassifier.cpp b/apps/traincascade/cascadeclassifier.cpp
index c9b524f5e..8b3eb57ac 100644
--- a/apps/traincascade/cascadeclassifier.cpp
+++ b/apps/traincascade/cascadeclassifier.cpp
@@ -135,7 +135,8 @@ bool CvCascadeClassifier::train( const string _cascadeDirName,
                                 const CvCascadeParams& _cascadeParams,
                                 const CvFeatureParams& _featureParams,
                                 const CvCascadeBoostParams& _stageParams,
-                                bool baseFormatSave )
+                                bool baseFormatSave,
+                                double acceptanceRatioBreakValue )
 {
     // Start recording clock ticks for training time output
     const clock_t begin_time = clock();
@@ -185,6 +186,7 @@ bool CvCascadeClassifier::train( const string _cascadeDirName,
     cout << "numStages: " << numStages << endl;
     cout << "precalcValBufSize[Mb] : " << _precalcValBufSize << endl;
     cout << "precalcIdxBufSize[Mb] : " << _precalcIdxBufSize << endl;
+    cout << "acceptanceRatioBreakValue : " << acceptanceRatioBreakValue << endl;
     cascadeParams.printAttrs();
     stageParams->printAttrs();
     featureParams->printAttrs();
@@ -207,13 +209,18 @@ bool CvCascadeClassifier::train( const string _cascadeDirName,
         if ( !updateTrainingSet( tempLeafFARate ) )
         {
             cout << "Train dataset for temp stage can not be filled. "
-                "Branch training terminated." << endl;
+                    "Branch training terminated." << endl;
             break;
         }
         if( tempLeafFARate <= requiredLeafFARate )
         {
             cout << "Required leaf false alarm rate achieved. "
-                 "Branch training terminated." << endl;
+                    "Branch training terminated." << endl;
+            break;
+        }
+        if( (tempLeafFARate <= acceptanceRatioBreakValue) && (acceptanceRatioBreakValue >= 0) ){
+            cout << "The required acceptanceRatio for the model has been reached to avoid overfitting of trainingdata. "
+                    "Branch training terminated." << endl;
             break;
         }
 
diff --git a/apps/traincascade/cascadeclassifier.h b/apps/traincascade/cascadeclassifier.h
index 6d6cb5b3f..d8e044828 100644
--- a/apps/traincascade/cascadeclassifier.h
+++ b/apps/traincascade/cascadeclassifier.h
@@ -94,7 +94,8 @@ public:
                 const CvCascadeParams& _cascadeParams,
                 const CvFeatureParams& _featureParams,
                 const CvCascadeBoostParams& _stageParams,
-                bool baseFormatSave = false );
+                bool baseFormatSave = false,
+                double acceptanceRatioBreakValue = -1.0 );
 private:
     int predict( int sampleIdx );
     void save( const std::string cascadeDirName, bool baseFormat = false );
diff --git a/apps/traincascade/imagestorage.cpp b/apps/traincascade/imagestorage.cpp
index 7a004c610..ef33c89f1 100644
--- a/apps/traincascade/imagestorage.cpp
+++ b/apps/traincascade/imagestorage.cpp
@@ -33,20 +33,12 @@ bool CvCascadeImageReader::NegReader::create( const string _filename, Size _winS
     if ( !file.is_open() )
         return false;
 
-    size_t pos = _filename.rfind('\\');
-    char dlmrt = '\\';
-    if (pos == string::npos)
-    {
-        pos = _filename.rfind('/');
-        dlmrt = '/';
-    }
-    dirname = pos == string::npos ? "" : _filename.substr(0, pos) + dlmrt;
     while( !file.eof() )
     {
         std::getline(file, str);
         if (str.empty()) break;
         if (str.at(0) == '#' ) continue; /* comment */
-        imgFilenames.push_back(dirname + str);
+        imgFilenames.push_back(str);
     }
     file.close();
 
diff --git a/apps/traincascade/traincascade.cpp b/apps/traincascade/traincascade.cpp
index d1c3e4e87..745e3054b 100644
--- a/apps/traincascade/traincascade.cpp
+++ b/apps/traincascade/traincascade.cpp
@@ -12,9 +12,10 @@ int main( int argc, char* argv[] )
     int numNeg    = 1000;
     int numStages = 20;
     int numThreads = getNumThreads();
-    int precalcValBufSize = 256,
-        precalcIdxBufSize = 256;
+    int precalcValBufSize = 1024,
+        precalcIdxBufSize = 1024;
     bool baseFormatSave = false;
+    double acceptanceRatioBreakValue = -1.0;
 
     CvCascadeParams cascadeParams;
     CvCascadeBoostParams stageParams;
@@ -36,6 +37,7 @@ int main( int argc, char* argv[] )
         cout << "  [-precalcIdxBufSize <precalculated_idxs_buffer_size_in_Mb = " << precalcIdxBufSize << ">]" << endl;
         cout << "  [-baseFormatSave]" << endl;
         cout << "  [-numThreads <max_number_of_threads = " << numThreads << ">]" << endl;
+        cout << "  [-acceptanceRatioBreakValue <value> = " << acceptanceRatioBreakValue << ">]" << endl;
         cascadeParams.printDefaults();
         stageParams.printDefaults();
         for( int fi = 0; fi < fc; fi++ )
@@ -86,6 +88,10 @@ int main( int argc, char* argv[] )
         {
           numThreads = atoi(argv[++i]);
         }
+        else if( !strcmp( argv[i], "-acceptanceRatioBreakValue" ) )
+        {
+          acceptanceRatioBreakValue = atof(argv[++i]);
+        }
         else if ( cascadeParams.scanAttr( argv[i], argv[i+1] ) ) { i++; }
         else if ( stageParams.scanAttr( argv[i], argv[i+1] ) ) { i++; }
         else if ( !set )
@@ -112,6 +118,7 @@ int main( int argc, char* argv[] )
                       cascadeParams,
                       *featureParams[cascadeParams.featureType],
                       stageParams,
-                      baseFormatSave );
+                      baseFormatSave,
+                      acceptanceRatioBreakValue );
     return 0;
 }
diff --git a/cmake/OpenCVConfig.cmake b/cmake/OpenCVConfig.cmake
index dfd7e8f26..09174b02f 100644
--- a/cmake/OpenCVConfig.cmake
+++ b/cmake/OpenCVConfig.cmake
@@ -47,7 +47,7 @@ endif()
 
 if(NOT DEFINED OpenCV_STATIC)
   # look for global setting
-  if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS)
+  if(BUILD_SHARED_LIBS)
     set(OpenCV_STATIC OFF)
   else()
     set(OpenCV_STATIC ON)
@@ -89,7 +89,7 @@ elseif(MINGW)
   execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine
                   OUTPUT_VARIABLE OPENCV_GCC_TARGET_MACHINE
                   OUTPUT_STRIP_TRAILING_WHITESPACE)
-  if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "64")
+  if(OPENCV_GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64")
     set(MINGW64 1)
     set(OpenCV_ARCH x64)
   else()
diff --git a/cmake/OpenCVDetectCXXCompiler.cmake b/cmake/OpenCVDetectCXXCompiler.cmake
index 72d939917..871331883 100644
--- a/cmake/OpenCVDetectCXXCompiler.cmake
+++ b/cmake/OpenCVDetectCXXCompiler.cmake
@@ -91,9 +91,9 @@ elseif(CMAKE_COMPILER_IS_GNUCXX)
 
   if(WIN32)
     execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine
-              OUTPUT_VARIABLE CMAKE_OPENCV_GCC_TARGET_MACHINE
+              OUTPUT_VARIABLE OPENCV_GCC_TARGET_MACHINE
               OUTPUT_STRIP_TRAILING_WHITESPACE)
-    if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64")
+    if(OPENCV_GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64")
       set(MINGW64 1)
     endif()
   endif()
@@ -147,11 +147,7 @@ if(MSVC)
 elseif(MINGW)
   set(OpenCV_RUNTIME mingw)
 
-  execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine
-                  OUTPUT_VARIABLE OPENCV_GCC_TARGET_MACHINE
-                  OUTPUT_STRIP_TRAILING_WHITESPACE)
-  if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "64")
-    set(MINGW64 1)
+  if(MINGW64)
     set(OpenCV_ARCH x64)
   else()
     set(OpenCV_ARCH x86)
diff --git a/cmake/OpenCVDetectPython.cmake b/cmake/OpenCVDetectPython.cmake
index 8ed425fb5..ed7569821 100644
--- a/cmake/OpenCVDetectPython.cmake
+++ b/cmake/OpenCVDetectPython.cmake
@@ -75,10 +75,10 @@ function(find_python preferred_version min_version library_env include_dir_env
 
     if(NOT ANDROID AND NOT IOS)
       ocv_check_environment_variables(${library_env} ${include_dir_env})
-      if(${${library_env}})
+      if(NOT ${${library_env}} EQUAL "")
           set(PYTHON_LIBRARY "${${library_env}}")
       endif()
-      if(${${include_dir_env}})
+      if(NOT ${${include_dir_env}} EQUAL "")
           set(PYTHON_INCLUDE_DIR "${${include_dir_env}}")
       endif()
 
diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake
index be394857c..1443c62ca 100644
--- a/cmake/OpenCVFindLibsVideo.cmake
+++ b/cmake/OpenCVFindLibsVideo.cmake
@@ -153,7 +153,13 @@ endif(WITH_XINE)
 ocv_clear_vars(HAVE_LIBV4L HAVE_CAMV4L HAVE_CAMV4L2 HAVE_VIDEOIO)
 if(WITH_V4L)
   if(WITH_LIBV4L)
-    CHECK_MODULE(libv4l1 HAVE_LIBV4L)
+    CHECK_MODULE(libv4l1 HAVE_LIBV4L1)
+    CHECK_MODULE(libv4l2 HAVE_LIBV4L2)
+    if(HAVE_LIBV4L1 AND HAVE_LIBV4L2)
+      set(HAVE_LIBV4L YES)
+    else()
+      set(HAVE_LIBV4L NO)
+    endif()
   endif()
   CHECK_INCLUDE_FILE(linux/videodev.h HAVE_CAMV4L)
   CHECK_INCLUDE_FILE(linux/videodev2.h HAVE_CAMV4L2)
@@ -262,7 +268,9 @@ endif(WITH_MSMF)
 # --- Extra HighGUI and VideoIO libs on Windows ---
 if(WIN32)
   list(APPEND HIGHGUI_LIBRARIES comctl32 gdi32 ole32 setupapi ws2_32)
-  list(APPEND VIDEOIO_LIBRARIES vfw32)
+  if(HAVE_VFW)
+    list(APPEND VIDEOIO_LIBRARIES vfw32)
+  endif()
   if(MINGW64)
     list(APPEND VIDEOIO_LIBRARIES avifil32 avicap32 winmm msvfw32)
     list(REMOVE_ITEM VIDEOIO_LIBRARIES vfw32)
diff --git a/cmake/OpenCVGenABI.cmake b/cmake/OpenCVGenABI.cmake
new file mode 100644
index 000000000..35cc10d8e
--- /dev/null
+++ b/cmake/OpenCVGenABI.cmake
@@ -0,0 +1,49 @@
+if (NOT GENERATE_ABI_DESCRIPTOR)
+  return()
+endif()
+
+set(filename "opencv_abi.xml")
+set(path1 "${CMAKE_BINARY_DIR}/${filename}")
+
+set(modules "${OPENCV_MODULES_PUBLIC}")
+ocv_list_filterout(modules "opencv_ts")
+
+message(STATUS "Generating ABI compliance checker configuration: ${filename}")
+
+if (OPENCV_VCSVERSION AND NOT OPENCV_VCSVERSION STREQUAL "unknown")
+  set(OPENCV_ABI_VERSION "${OPENCV_VCSVERSION}")
+else()
+  set(OPENCV_ABI_VERSION "${OPENCV_VERSION}")
+endif()
+
+# Headers
+set(OPENCV_ABI_HEADERS "{RELPATH}/${OPENCV_INCLUDE_INSTALL_PATH}")
+
+# Libraries
+set(OPENCV_ABI_LIBRARIES "{RELPATH}/${OPENCV_LIB_INSTALL_PATH}")
+
+set(OPENCV_ABI_SKIP_HEADERS "")
+set(OPENCV_ABI_SKIP_LIBRARIES "")
+foreach(mod ${OPENCV_MODULES_BUILD})
+  string(REGEX REPLACE "^opencv_" "" mod "${mod}")
+  if(NOT "${OPENCV_MODULE_opencv_${mod}_LOCATION}" STREQUAL "${OpenCV_SOURCE_DIR}/modules/${mod}")
+    # headers
+    foreach(h ${OPENCV_MODULE_opencv_${mod}_HEADERS})
+      file(RELATIVE_PATH h "${OPENCV_MODULE_opencv_${mod}_LOCATION}/include" "${h}")
+      list(APPEND OPENCV_ABI_SKIP_HEADERS "${h}")
+    endforeach()
+    # libraries
+    set(lib_name "")
+    get_target_property(lib_name opencv_${mod} LOCATION)
+    get_filename_component(lib_name "${lib_name}" NAME)
+    list(APPEND OPENCV_ABI_SKIP_LIBRARIES "${lib_name}")
+  endif()
+endforeach()
+string(REPLACE ";" "\n    " OPENCV_ABI_SKIP_HEADERS "${OPENCV_ABI_SKIP_HEADERS}")
+string(REPLACE ";" "\n    " OPENCV_ABI_SKIP_LIBRARIES "${OPENCV_ABI_SKIP_LIBRARIES}")
+
+# Options
+set(OPENCV_ABI_GCC_OPTIONS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}")
+string(REGEX REPLACE "([^ ]) +([^ ])" "\\1\\n    \\2" OPENCV_ABI_GCC_OPTIONS "${OPENCV_ABI_GCC_OPTIONS}")
+
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/templates/opencv_abi.xml.in" "${path1}")
diff --git a/cmake/OpenCVGenConfig.cmake b/cmake/OpenCVGenConfig.cmake
index 249479d39..ae8fc8939 100644
--- a/cmake/OpenCVGenConfig.cmake
+++ b/cmake/OpenCVGenConfig.cmake
@@ -101,10 +101,7 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig-version.cmake.
 set(OpenCV_INCLUDE_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OPENCV_INCLUDE_INSTALL_PATH}/opencv" "\${OpenCV_INSTALL_PATH}/${OPENCV_INCLUDE_INSTALL_PATH}\"")
 
 set(OpenCV2_INCLUDE_DIRS_CONFIGCMAKE "\"\"")
-if(INSTALL_TO_MANGLED_PATHS)
-  string(REPLACE "OpenCV" "OpenCV-${OPENCV_VERSION}" OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "${OPENCV_3P_LIB_INSTALL_PATH}")
-  set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE}\"")
-endif()
+set(OpenCV_3RDPARTY_LIB_DIRS_CONFIGCMAKE "\"\${OpenCV_INSTALL_PATH}/${OPENCV_3P_LIB_INSTALL_PATH}\"")
 
 if(UNIX) # ANDROID configuration is created here also
   #http://www.vtk.org/Wiki/CMake/Tutorials/Packaging reference
@@ -114,23 +111,13 @@ if(UNIX) # ANDROID configuration is created here also
   #                <prefix>/(share|lib)/<name>*/                           (U)
   #                <prefix>/(share|lib)/<name>*/(cmake|CMake)/             (U)
   if(USE_IPPICV)
-    if(INSTALL_TO_MANGLED_PATHS)
-      file(RELATIVE_PATH INSTALL_PATH_RELATIVE_IPPICV "${CMAKE_INSTALL_PREFIX}/${OPENCV_CONFIG_INSTALL_PATH}-${OPENCV_VERSION}/" ${IPPICV_INSTALL_PATH})
-    else()
-      file(RELATIVE_PATH INSTALL_PATH_RELATIVE_IPPICV "${CMAKE_INSTALL_PREFIX}/${OPENCV_CONFIG_INSTALL_PATH}/" ${IPPICV_INSTALL_PATH})
-    endif()
+    file(RELATIVE_PATH INSTALL_PATH_RELATIVE_IPPICV "${CMAKE_INSTALL_PREFIX}/${OPENCV_CONFIG_INSTALL_PATH}/" ${IPPICV_INSTALL_PATH})
   endif()
   configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig.cmake.in" "${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig.cmake" @ONLY)
   configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/OpenCVConfig-version.cmake.in" "${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig-version.cmake" @ONLY)
-  if(INSTALL_TO_MANGLED_PATHS)
-    install(FILES ${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig.cmake DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}-${OPENCV_VERSION}/ COMPONENT dev)
-    install(FILES ${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig-version.cmake DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}-${OPENCV_VERSION}/ COMPONENT dev)
-    install(EXPORT OpenCVModules DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}-${OPENCV_VERSION}/ FILE OpenCVModules${modules_file_suffix}.cmake COMPONENT dev)
-  else()
-    install(FILES "${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/ COMPONENT dev)
-    install(FILES ${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig-version.cmake DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/ COMPONENT dev)
-    install(EXPORT OpenCVModules DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/ FILE OpenCVModules${modules_file_suffix}.cmake COMPONENT dev)
-  endif()
+  install(FILES "${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/ COMPONENT dev)
+  install(FILES ${CMAKE_BINARY_DIR}/unix-install/OpenCVConfig-version.cmake DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/ COMPONENT dev)
+  install(EXPORT OpenCVModules DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/ FILE OpenCVModules${modules_file_suffix}.cmake COMPONENT dev)
 endif()
 
 if(ANDROID)
diff --git a/cmake/checks/winrttest.cpp b/cmake/checks/winrttest.cpp
deleted file mode 100644
index 9ec0c9ac1..000000000
--- a/cmake/checks/winrttest.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <wrl/client.h>
-
-int main(int, char**)
-{
-    return 0;
-}
diff --git a/cmake/templates/opencv_abi.xml.in b/cmake/templates/opencv_abi.xml.in
new file mode 100644
index 000000000..6a7a6d8d7
--- /dev/null
+++ b/cmake/templates/opencv_abi.xml.in
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<!--
+
+    This file is auto-generated
+
+-->
+
+<descriptor>
+
+<version>
+    @OPENCV_ABI_VERSION@
+</version>
+
+<headers>
+    @OPENCV_ABI_HEADERS@
+</headers>
+
+<libs>
+    @OPENCV_ABI_LIBRARIES@
+</libs>
+
+<skip_headers>
+    opencv2/core/cuda*
+    opencv2/core/private*
+    opencv/cxeigen.hpp
+    opencv2/core/eigen.hpp
+    opencv2/flann/hdf5.h
+    opencv2/imgcodecs/ios.h
+    opencv2/videoio/cap_ios.h
+    opencv2/ts.hpp
+    opencv2/ts/*
+    opencv2/xobjdetect/private.hpp
+    @OPENCV_ABI_SKIP_HEADERS@
+</skip_headers>
+
+<skip_libs>
+    @OPENCV_ABI_SKIP_LIBRARIES@
+</skip_libs>
+
+<gcc_options>
+ @OPENCV_ABI_GCC_OPTIONS@
+</gcc_options>
+
+</descriptor>
diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt
index bc5a0361a..1f0d72008 100644
--- a/data/CMakeLists.txt
+++ b/data/CMakeLists.txt
@@ -1,14 +1,9 @@
 file(GLOB HAAR_CASCADES haarcascades/*.xml)
 file(GLOB LBP_CASCADES lbpcascades/*.xml)
 
-if(ANDROID)
-  install(FILES ${HAAR_CASCADES} DESTINATION sdk/etc/haarcascades COMPONENT libs)
-  install(FILES ${LBP_CASCADES}  DESTINATION sdk/etc/lbpcascades  COMPONENT libs)
-else()
-  install(FILES ${HAAR_CASCADES} DESTINATION share/OpenCV/haarcascades COMPONENT libs)
-  install(FILES ${LBP_CASCADES}  DESTINATION share/OpenCV/lbpcascades  COMPONENT libs)
-endif()
+install(FILES ${HAAR_CASCADES} DESTINATION ${OPENCV_OTHER_INSTALL_PATH}/haarcascades COMPONENT libs)
+install(FILES ${LBP_CASCADES}  DESTINATION ${OPENCV_OTHER_INSTALL_PATH}/lbpcascades  COMPONENT libs)
 
 if(INSTALL_TESTS AND OPENCV_TEST_DATA_PATH)
   install(DIRECTORY "${OPENCV_TEST_DATA_PATH}/" DESTINATION "${OPENCV_TEST_DATA_INSTALL_PATH}" COMPONENT "tests")
-endif()
\ No newline at end of file
+endif()
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index 866134fe1..a7f5372bf 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -27,15 +27,6 @@ if(HAVE_DOC_GENERATOR)
   set(FIXED_ORDER_MODULES core imgproc imgcodecs videoio highgui video calib3d features2d objdetect ml flann photo stitching)
   list(REMOVE_ITEM BASE_MODULES ${FIXED_ORDER_MODULES})
   set(BASE_MODULES ${FIXED_ORDER_MODULES} ${BASE_MODULES})
-
-  set(DOC_LIST
-      "${OpenCV_SOURCE_DIR}/doc/opencv-logo.png"
-      "${OpenCV_SOURCE_DIR}/doc/opencv-logo2.png"
-      "${OpenCV_SOURCE_DIR}/doc/opencv-logo-white.png"
-      "${OpenCV_SOURCE_DIR}/doc/opencv.ico"
-      "${OpenCV_SOURCE_DIR}/doc/pattern.png"
-      "${OpenCV_SOURCE_DIR}/doc/acircles_pattern.png")
-  set(OPTIONAL_DOC_LIST "")
 endif(HAVE_DOC_GENERATOR)
 
 # ========= Doxygen docs =========
@@ -160,18 +151,8 @@ if(BUILD_DOCS AND DOXYGEN_FOUND)
     COMMAND ${DOXYGEN_EXECUTABLE} ${doxyfile}
     DEPENDS ${doxyfile} ${rootfile} ${bibfile} ${deps}
   )
+  install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/doxygen/html
+    DESTINATION "${OPENCV_DOC_INSTALL_PATH}"
+    COMPONENT "docs" OPTIONAL
+  )
 endif()
-
-if(HAVE_DOC_GENERATOR)
-  # installation
-  foreach(f ${DOC_LIST})
-    install(FILES "${f}" DESTINATION "${OPENCV_DOC_INSTALL_PATH}" COMPONENT docs)
-  endforeach()
-  foreach(f ${OPTIONAL_DOC_LIST})
-    install(FILES "${f}" DESTINATION "${OPENCV_DOC_INSTALL_PATH}" OPTIONAL COMPONENT docs)
-  endforeach()
-
-  # dummy targets
-  add_custom_target(docs)
-  add_custom_target(html_docs)
-endif(HAVE_DOC_GENERATOR)
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index 93ccafaae..192081e9b 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -243,11 +243,7 @@ PREDEFINED             = __cplusplus=1 \
                          CV_NORETURN= \
                          CV_DEFAULT(x)=" = x" \
                          CV_NEON=1 \
-                         FLANN_DEPRECATED= \
-                         "CV_PURE_PROPERTY(type, name)= /** \@see set##name */ virtual type get##name() const = 0; /** \@copybrief get##name \@see get##name */ virtual void set##name(type val) = 0;" \
-                         "CV_IMPL_PROPERTY(type, name, x)= /** \@see set##name */ virtual type get##name() const = 0; /** \@copybrief get##name \@see get##name */ virtual void set##name(type val) = 0;" \
-                         "CV_IMPL_PROPERTY_S(type, name, x)= /** \@see set##name */ virtual type get##name() const = 0; /** \@copybrief get##name \@see get##name */ virtual void set##name(const type & val);" \
-                         "CV_IMPL_PROPERTY_RO(type, name, x)= virtual type get##name() const;"
+                         FLANN_DEPRECATED=
 EXPAND_AS_DEFINED      =
 SKIP_FUNCTION_MACROS   = YES
 TAGFILES               =
diff --git a/doc/DoxygenLayout.xml b/doc/DoxygenLayout.xml
index b2675719c..149f36f52 100644
--- a/doc/DoxygenLayout.xml
+++ b/doc/DoxygenLayout.xml
@@ -17,6 +17,7 @@
       <tab type="globals" visible="yes" title="Global objects" intro=""/>
     </tab>
     <tab type="examples" visible="yes" title="" intro=""/>
+    <tab type="user" url="/3.0-last-rst" title="Sphinx Documentation"/>
   </navindex>
 
   <!-- Layout definition for a class page -->
diff --git a/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown b/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown
index 0b2364396..a7bd1f059 100644
--- a/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown
+++ b/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown
@@ -30,7 +30,7 @@ y_{corrected} = y + [ p_1(r^2+ 2y^2)+ 2p_2xy]\f]
 So we have five distortion parameters which in OpenCV are presented as one row matrix with 5
 columns:
 
-\f[Distortion_{coefficients}=(k_1 \hspace{10pt} k_2 \hspace{10pt} p_1 \hspace{10pt} p_2 \hspace{10pt} k_3)\f]
+\f[distortion\_coefficients=(k_1 \hspace{10pt} k_2 \hspace{10pt} p_1 \hspace{10pt} p_2 \hspace{10pt} k_3)\f]
 
 Now for the unit conversion we use the following formula:
 
@@ -96,83 +96,30 @@ on how to do this you can find in the @ref tutorial_file_input_output_with_xml_y
 Explanation
 -----------
 
--#  **Read the settings.**
-    @code{.cpp}
-    Settings s;
-    const string inputSettingsFile = argc > 1 ? argv[1] : "default.xml";
-    FileStorage fs(inputSettingsFile, FileStorage::READ); // Read the settings
-    if (!fs.isOpened())
-    {
-          cout << "Could not open the configuration file: \"" << inputSettingsFile << "\"" << endl;
-          return -1;
-    }
-    fs["Settings"] >> s;
-    fs.release();                                         // close Settings file
+-#  **Read the settings**
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp file_read
 
-    if (!s.goodInput)
-    {
-          cout << "Invalid input detected. Application stopping. " << endl;
-          return -1;
-    }
-    @endcode
     For this I've used simple OpenCV class input operation. After reading the file I've an
     additional post-processing function that checks validity of the input. Only if all inputs are
     good then *goodInput* variable will be true.
 
--#  **Get next input, if it fails or we have enough of them - calibrate**. After this we have a big
+-#  **Get next input, if it fails or we have enough of them - calibrate**
+
+    After this we have a big
     loop where we do the following operations: get the next image from the image list, camera or
     video file. If this fails or we have enough images then we run the calibration process. In case
     of image we step out of the loop and otherwise the remaining frames will be undistorted (if the
     option is set) via changing from *DETECTION* mode to the *CALIBRATED* one.
-    @code{.cpp}
-    for(int i = 0;;++i)
-    {
-      Mat view;
-      bool blinkOutput = false;
-
-      view = s.nextImage();
-
-      //-----  If no more image, or got enough, then stop calibration and show result -------------
-      if( mode == CAPTURING && imagePoints.size() >= (unsigned)s.nrFrames )
-      {
-            if( runCalibrationAndSave(s, imageSize,  cameraMatrix, distCoeffs, imagePoints))
-                  mode = CALIBRATED;
-            else
-                  mode = DETECTION;
-      }
-      if(view.empty())          // If no more images then run calibration, save and stop loop.
-      {
-                if( imagePoints.size() > 0 )
-                      runCalibrationAndSave(s, imageSize,  cameraMatrix, distCoeffs, imagePoints);
-                break;
-      imageSize = view.size();  // Format input image.
-      if( s.flipVertical )    flip( view, view, 0 );
-      }
-    @endcode
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp get_input
     For some cameras we may need to flip the input image. Here we do this too.
 
--#  **Find the pattern in the current input**. The formation of the equations I mentioned above aims
+-#  **Find the pattern in the current input**
+
+    The formation of the equations I mentioned above aims
     to finding major patterns in the input: in case of the chessboard this are corners of the
     squares and for the circles, well, the circles themselves. The position of these will form the
     result which will be written into the *pointBuf* vector.
-    @code{.cpp}
-    vector<Point2f> pointBuf;
-
-    bool found;
-    switch( s.calibrationPattern ) // Find feature points on the input format
-    {
-    case Settings::CHESSBOARD:
-      found = findChessboardCorners( view, s.boardSize, pointBuf,
-      CALIB_CB_ADAPTIVE_THRESH | CALIB_CB_FAST_CHECK | CALIB_CB_NORMALIZE_IMAGE);
-      break;
-    case Settings::CIRCLES_GRID:
-      found = findCirclesGrid( view, s.boardSize, pointBuf );
-      break;
-    case Settings::ASYMMETRIC_CIRCLES_GRID:
-      found = findCirclesGrid( view, s.boardSize, pointBuf, CALIB_CB_ASYMMETRIC_GRID );
-      break;
-    }
-    @endcode
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp find_pattern
     Depending on the type of the input pattern you use either the @ref cv::findChessboardCorners or
     the @ref cv::findCirclesGrid function. For both of them you pass the current image and the size
     of the board and you'll get the positions of the patterns. Furthermore, they return a boolean
@@ -188,109 +135,27 @@ Explanation
     *imagePoints* vector to collect all of the equations into a single container. Finally, for
     visualization feedback purposes we will draw the found points on the input image using @ref
     cv::findChessboardCorners function.
-    @code{.cpp}
-    if ( found)                // If done with success,
-      {
-          // improve the found corners' coordinate accuracy for chessboard
-            if( s.calibrationPattern == Settings::CHESSBOARD)
-            {
-                Mat viewGray;
-                cvtColor(view, viewGray, COLOR_BGR2GRAY);
-                cornerSubPix( viewGray, pointBuf, Size(11,11),
-                  Size(-1,-1), TermCriteria( TermCriteria::EPS+TermCriteria::MAX_ITER, 30, 0.1 ));
-            }
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp pattern_found
+-#  **Show state and result to the user, plus command line control of the application**
 
-            if( mode == CAPTURING &&  // For camera only take new samples after delay time
-                (!s.inputCapture.isOpened() || clock() - prevTimestamp > s.delay*1e-3*CLOCKS_PER_SEC) )
-            {
-                imagePoints.push_back(pointBuf);
-                prevTimestamp = clock();
-                blinkOutput = s.inputCapture.isOpened();
-            }
-
-            // Draw the corners.
-            drawChessboardCorners( view, s.boardSize, Mat(pointBuf), found );
-      }
-    @endcode
--#  **Show state and result to the user, plus command line control of the application**. This part
-    shows text output on the image.
-    @code{.cpp}
-    //----------------------------- Output Text ------------------------------------------------
-    string msg = (mode == CAPTURING) ? "100/100" :
-              mode == CALIBRATED ? "Calibrated" : "Press 'g' to start";
-    int baseLine = 0;
-    Size textSize = getTextSize(msg, 1, 1, 1, &baseLine);
-    Point textOrigin(view.cols - 2*textSize.width - 10, view.rows - 2*baseLine - 10);
-
-    if( mode == CAPTURING )
-    {
-      if(s.showUndistorsed)
-        msg = format( "%d/%d Undist", (int)imagePoints.size(), s.nrFrames );
-      else
-        msg = format( "%d/%d", (int)imagePoints.size(), s.nrFrames );
-    }
-
-    putText( view, msg, textOrigin, 1, 1, mode == CALIBRATED ?  GREEN : RED);
-
-    if( blinkOutput )
-       bitwise_not(view, view);
-    @endcode
+    This part shows text output on the image.
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp output_text
     If we ran calibration and got camera's matrix with the distortion coefficients we may want to
     correct the image using @ref cv::undistort function:
-    @code{.cpp}
-    //------------------------- Video capture  output  undistorted ------------------------------
-    if( mode == CALIBRATED && s.showUndistorsed )
-    {
-      Mat temp = view.clone();
-      undistort(temp, view, cameraMatrix, distCoeffs);
-    }
-    //------------------------------ Show image and check for input commands -------------------
-    imshow("Image View", view);
-    @endcode
-    Then we wait for an input key and if this is *u* we toggle the distortion removal, if it is *g*
-    we start again the detection process, and finally for the *ESC* key we quit the application:
-    @code{.cpp}
-    char key =  waitKey(s.inputCapture.isOpened() ? 50 : s.delay);
-    if( key  == ESC_KEY )
-          break;
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp output_undistorted
+    Then we show the image and wait for an input key and if this is *u* we toggle the distortion removal,
+    if it is *g* we start again the detection process, and finally for the *ESC* key we quit the application:
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp await_input
+-#  **Show the distortion removal for the images too**
 
-    if( key == 'u' && mode == CALIBRATED )
-       s.showUndistorsed = !s.showUndistorsed;
-
-    if( s.inputCapture.isOpened() && key == 'g' )
-    {
-      mode = CAPTURING;
-      imagePoints.clear();
-    }
-    @endcode
--#  **Show the distortion removal for the images too**. When you work with an image list it is not
+    When you work with an image list it is not
     possible to remove the distortion inside the loop. Therefore, you must do this after the loop.
     Taking advantage of this now I'll expand the @ref cv::undistort function, which is in fact first
     calls @ref cv::initUndistortRectifyMap to find transformation matrices and then performs
     transformation using @ref cv::remap function. Because, after successful calibration map
     calculation needs to be done only once, by using this expanded form you may speed up your
     application:
-    @code{.cpp}
-    if( s.inputType == Settings::IMAGE_LIST && s.showUndistorsed )
-    {
-      Mat view, rview, map1, map2;
-      initUndistortRectifyMap(cameraMatrix, distCoeffs, Mat(),
-          getOptimalNewCameraMatrix(cameraMatrix, distCoeffs, imageSize, 1, imageSize, 0),
-          imageSize, CV_16SC2, map1, map2);
-
-      for(int i = 0; i < (int)s.imageList.size(); i++ )
-      {
-          view = imread(s.imageList[i], 1);
-          if(view.empty())
-              continue;
-          remap(view, rview, map1, map2, INTER_LINEAR);
-          imshow("Image View", rview);
-          char c = waitKey();
-          if( c  == ESC_KEY || c == 'q' || c == 'Q' )
-              break;
-      }
-    }
-    @endcode
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp show_results
 
 The calibration and save
 ------------------------
@@ -304,24 +169,7 @@ Therefore in the first function we just split up these two processes. Because we
 of the calibration variables we'll create these variables here and pass on both of them to the
 calibration and saving function. Again, I'll not show the saving part as that has little in common
 with the calibration. Explore the source file in order to find out how and what:
-@code{.cpp}
-bool runCalibrationAndSave(Settings& s, Size imageSize, Mat&  cameraMatrix, Mat& distCoeffs,vector<vector<Point2f> > imagePoints )
-{
- vector<Mat> rvecs, tvecs;
- vector<float> reprojErrs;
- double totalAvgErr = 0;
-
- bool ok = runCalibration(s,imageSize, cameraMatrix, distCoeffs, imagePoints, rvecs, tvecs,
-                          reprojErrs, totalAvgErr);
- cout << (ok ? "Calibration succeeded" : "Calibration failed")
-     << ". avg re projection error = "  << totalAvgErr ;
-
- if( ok )   // save only if the calibration was done with success
-     saveCameraParams( s, imageSize, cameraMatrix, distCoeffs, rvecs ,tvecs, reprojErrs,
-                         imagePoints, totalAvgErr);
- return ok;
-}
-@endcode
+@snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp run_and_save
 We do the calibration with the help of the @ref cv::calibrateCamera function. It has the following
 parameters:
 
@@ -331,29 +179,7 @@ parameters:
     present. Because, we use a single pattern for all the input images we can calculate this just
     once and multiply it for all the other input views. We calculate the corner points with the
     *calcBoardCornerPositions* function as:
-    @code{.cpp}
-    void calcBoardCornerPositions(Size boardSize, float squareSize, vector<Point3f>& corners,
-                      Settings::Pattern patternType /*= Settings::CHESSBOARD*/)
-    {
-    corners.clear();
-
-    switch(patternType)
-    {
-    case Settings::CHESSBOARD:
-    case Settings::CIRCLES_GRID:
-      for( int i = 0; i < boardSize.height; ++i )
-        for( int j = 0; j < boardSize.width; ++j )
-            corners.push_back(Point3f(float( j*squareSize ), float( i*squareSize ), 0));
-      break;
-
-    case Settings::ASYMMETRIC_CIRCLES_GRID:
-      for( int i = 0; i < boardSize.height; i++ )
-         for( int j = 0; j < boardSize.width; j++ )
-            corners.push_back(Point3f(float((2*j + i % 2)*squareSize), float(i*squareSize), 0));
-      break;
-    }
-    }
-    @endcode
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp board_corners
     And then multiply it as:
     @code{.cpp}
     vector<vector<Point3f> > objectPoints(1);
@@ -365,12 +191,8 @@ parameters:
     circle pattern). We have already collected this from @ref cv::findChessboardCorners or @ref
     cv::findCirclesGrid function. We just need to pass it on.
 -   The size of the image acquired from the camera, video file or the images.
--   The camera matrix. If we used the fixed aspect ratio option we need to set the \f$f_x\f$ to zero:
-    @code{.cpp}
-    cameraMatrix = Mat::eye(3, 3, CV_64F);
-    if( s.flag & CALIB_FIX_ASPECT_RATIO )
-         cameraMatrix.at<double>(0,0) = 1.0;
-    @endcode
+-   The camera matrix. If we used the fixed aspect ratio option we need to set \f$f_x\f$:
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp fixed_aspect
 -   The distortion coefficient matrix. Initialize with zero.
     @code{.cpp}
     distCoeffs = Mat::zeros(8, 1, CV_64F);
@@ -393,33 +215,7 @@ double rms = calibrateCamera(objectPoints, imagePoints, imageSize, cameraMatrix,
     calculate the absolute norm between what we got with our transformation and the corner/circle
     finding algorithm. To find the average error we calculate the arithmetical mean of the errors
     calculated for all the calibration images.
-    @code{.cpp}
-    double computeReprojectionErrors( const vector<vector<Point3f> >& objectPoints,
-                              const vector<vector<Point2f> >& imagePoints,
-                              const vector<Mat>& rvecs, const vector<Mat>& tvecs,
-                              const Mat& cameraMatrix , const Mat& distCoeffs,
-                              vector<float>& perViewErrors)
-    {
-    vector<Point2f> imagePoints2;
-    int i, totalPoints = 0;
-    double totalErr = 0, err;
-    perViewErrors.resize(objectPoints.size());
-
-    for( i = 0; i < (int)objectPoints.size(); ++i )
-    {
-      projectPoints( Mat(objectPoints[i]), rvecs[i], tvecs[i], cameraMatrix,  // project
-                                           distCoeffs, imagePoints2);
-      err = norm(Mat(imagePoints[i]), Mat(imagePoints2), NORM_L2);              // difference
-
-      int n = (int)objectPoints[i].size();
-      perViewErrors[i] = (float) std::sqrt(err*err/n);                        // save for this view
-      totalErr        += err*err;                                             // sum it up
-      totalPoints     += n;
-    }
-
-    return std::sqrt(totalErr/totalPoints);              // calculate the arithmetical mean
-    }
-    @endcode
+    @snippet samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp compute_errors
 
 Results
 -------
@@ -461,20 +257,20 @@ the input. Here's, how a detected pattern should look:
 In both cases in the specified output XML/YAML file you'll find the camera and distortion
 coefficients matrices:
 @code{.xml}
-<Camera_Matrix type_id="opencv-matrix">
+<camera_matrix type_id="opencv-matrix">
 <rows>3</rows>
 <cols>3</cols>
 <dt>d</dt>
 <data>
  6.5746697944293521e+002 0. 3.1950000000000000e+002 0.
- 6.5746697944293521e+002 2.3950000000000000e+002 0. 0. 1.</data></Camera_Matrix>
-<Distortion_Coefficients type_id="opencv-matrix">
+ 6.5746697944293521e+002 2.3950000000000000e+002 0. 0. 1.</data></camera_matrix>
+<distortion_coefficients type_id="opencv-matrix">
 <rows>5</rows>
 <cols>1</cols>
 <dt>d</dt>
 <data>
  -4.1802327176423804e-001 5.0715244063187526e-001 0. 0.
- -5.7843597214487474e-001</data></Distortion_Coefficients>
+ -5.7843597214487474e-001</data></distortion_coefficients>
 @endcode
 Add these values as constants to your program, call the @ref cv::initUndistortRectifyMap and the
 @ref cv::remap function to remove distortion and enjoy distortion free inputs for cheap and low
diff --git a/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown b/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown
index 9cf4e216a..f91595ece 100644
--- a/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown
+++ b/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown
@@ -74,7 +74,7 @@ available methods are 6:
 
 -#  **method=CV_TM_CCOEFF**
 
-    \f[R(x,y)= \sum _{x',y'} (T'(x',y')  \cdot I(x+x',y+y'))\f]
+    \f[R(x,y)= \sum _{x',y'} (T'(x',y')  \cdot I'(x+x',y+y'))\f]
 
     where
 
diff --git a/doc/tutorials/introduction/table_of_content_introduction.markdown b/doc/tutorials/introduction/table_of_content_introduction.markdown
index 6ce2a3bba..35a815f67 100644
--- a/doc/tutorials/introduction/table_of_content_introduction.markdown
+++ b/doc/tutorials/introduction/table_of_content_introduction.markdown
@@ -149,3 +149,9 @@ Additionally you can find very basic sample source code to introduce you to the
     _Author:_ Maksim Shabunin
 
     This tutorial describes new documenting process and some useful Doxygen features.
+
+-   @subpage tutorial_transition_guide
+
+    _Author:_ Maksim Shabunin
+
+    This document describes some aspects of 2.4 -> 3.0 transition process.
diff --git a/doc/tutorials/introduction/transition_guide/transition_guide.markdown b/doc/tutorials/introduction/transition_guide/transition_guide.markdown
new file mode 100644
index 000000000..5f3e35150
--- /dev/null
+++ b/doc/tutorials/introduction/transition_guide/transition_guide.markdown
@@ -0,0 +1,259 @@
+Transition guide {#tutorial_transition_guide}
+================
+
+@tableofcontents
+
+Changes overview {#tutorial_transition_overview}
+================
+This document is intended to software developers who want to migrate their code to OpenCV 3.0.
+
+OpenCV 3.0 introduced many new algorithms and features comparing to version 2.4. Some modules have been rewritten, some have been reorganized. Although most of the algorithms from 2.4 are still present, the interfaces can differ.
+
+This section describes most notable changes in general, all details and examples of transition actions are in the next part of the document.
+
+##### Contrib repository
+<https://github.com/Itseez/opencv_contrib>
+
+This is a place for all new, experimental and non-free algorithms. It does not receive so much attention from the support team comparing to main repository, but the community makes an effort to keep it in a good shape.
+
+To build OpenCV with _contrib_ repository, add the following option to your cmake command:
+@code{.sh}
+-DOPENCV_EXTRA_MODULES_PATH=<path-to-opencv_contrib>/modules
+@endcode
+
+##### Headers layout
+In 2.4 all headers are located in corresponding module subfolder (_opencv2/\<module\>/\<module\>.hpp_), in 3.0 there are top-level module headers containing the most of the module functionality: _opencv2/\<module\>.hpp_ and all C-style API definitions have been moved to separate headers (for example opencv2/core/core_c.h).
+
+##### Algorithm interfaces
+General algorithm usage pattern has changed: now it must be created on heap wrapped in smart pointer cv::Ptr. Version 2.4 allowed both stack and heap allocations, directly or via smart pointer.
+
+_get_ and _set_ methods have been removed from the cv::Algorithm class along with _CV_INIT_ALGORITHM_ macro. In 3.0 all properties have been converted to the pairs of _getProperty/setProperty_ pure virtual methods. As a result it is __not__ possible to create and use cv::Algorithm instance by name (using generic _Algorithm::create(String)_ method), one should call corresponding factory method explicitly.
+
+##### Changed modules
+-   _ml_ module has been rewritten
+-   _highgui_ module has been split into parts: _imgcodecs_, _videoio_ and _highgui_ itself
+-   _features2d_ module have been reorganized (some feature detectors has been moved to _opencv_contrib/xfeatures2d_ module)
+-   _legacy_, _nonfree_ modules have been removed. Some algorithms have been moved to different locations and some have been completely rewritten or removed
+-   CUDA API has been updated (_gpu_ module -> several _cuda_ modules, namespace _gpu_ -> namespace _cuda_)
+-   OpenCL API has changed (_ocl_ module has been removed, separate _ocl::_ implementations -> Transparent API)
+-   Some other methods and classes have been relocated
+
+Transition hints {#tutorial_transition_hints}
+================
+This section describes concrete actions with examples.
+
+Prepare 2.4 {#tutorial_transition_hints_24}
+-----------
+Some changes made in the latest 2.4.11 OpenCV version allow you to prepare current codebase to migration:
+
+- cv::makePtr function is now available
+- _opencv2/\<module\>.hpp_ headers have been created
+
+New headers layout {#tutorial_transition_hints_headers}
+------------------
+__Note:__
+Changes intended to ease the migration have been made in OpenCV 3.0, thus the following instructions are not necessary, but recommended.
+
+1. Replace inclusions of old module headers:
+@code{.cpp}
+// old header
+#include "opencv2/<module>/<module>.hpp"
+// new header
+#include "opencv2/<module>.hpp"
+@endcode
+
+2. If your code is using C API (`cv*` functions, `Cv*` structures or `CV_*` enumerations), include corresponding `*_c.h` headers. Although it is recommended to use C++ API, most of C-functions are still accessible in separate header files (opencv2/core/core_c.h, opencv2/core/types_c.h, opencv2/imgproc/imgproc_c.h, etc.).
+
+Modern way to use algorithm {#tutorial_transition_algorithm}
+---------------------------
+1.  Algorithm instances must be created with cv::makePtr function or corresponding static factory method if available:
+    @code{.cpp}
+    // good ways
+    Ptr<SomeAlgo> algo = makePtr<SomeAlgo>(...);
+    Ptr<SomeAlgo> algo = SomeAlgo::create(...);
+    @endcode
+    Other ways are deprecated:
+    @code{.cpp}
+    // bad ways
+    Ptr<SomeAlgo> algo = new SomeAlgo(...);
+    SomeAlgo * algo = new SomeAlgo(...);
+    SomeAlgo algo(...);
+    Ptr<SomeAlgo> algo = Algorithm::create<SomeAlgo>("name");
+    @endcode
+
+2.  Algorithm properties should be accessed via corresponding virtual methods, _getSomeProperty/setSomeProperty_, generic _get/set_ methods have been removed:
+    @code{.cpp}
+    // good way
+    double clipLimit = clahe->getClipLimit();
+    clahe->setClipLimit(clipLimit);
+    // bad way
+    double clipLimit = clahe->getDouble("clipLimit");
+    clahe->set("clipLimit", clipLimit);
+    clahe->setDouble("clipLimit", clipLimit);
+    @endcode
+
+
+3.  Remove `initModule_<moduleName>()` calls
+
+Machine learning module {#tutorial_transition_hints_ml}
+-----------------------
+Since this module has been rewritten, it will take some effort to adapt your software to it. All algorithms are located in separate _ml_ namespace along with their base class _StatModel_. Separate _SomeAlgoParams_ classes have been replaced with a sets of corresponding _getProperty/setProperty_ methods.
+
+The following table illustrates correspondence between 2.4 and 3.0 machine learning classes.
+
+|       2.4 | 3.0       |
+| --------- | --------- |
+| CvStatModel | cv::ml::StatModel |
+| CvNormalBayesClassifier | cv::ml::NormalBayesClassifier |
+| CvKNearest | cv::ml::KNearest |
+| CvSVM | cv::ml::SVM |
+| CvDTree | cv::ml::DTrees |
+| CvBoost | cv::ml::Boost |
+| CvGBTrees | _Not implemented_ |
+| CvRTrees | cv::ml::RTrees |
+| CvERTrees | _Not implemented_ |
+| EM | cv::ml::EM |
+| CvANN_MLP | cv::ml::ANN_MLP |
+| _Not implemented_ | cv::ml::LogisticRegression |
+| CvMLData | cv::ml::TrainData |
+
+Although rewritten _ml_ algorithms in 3.0 allow you to load old trained models from _xml/yml_ file, deviations in prediction process are possible.
+
+The following code snippets from the `points_classifier.cpp` example illustrate differences in model training process:
+@code{.cpp}
+using namespace cv;
+// ======== version 2.4 ========
+Mat trainSamples, trainClasses;
+prepare_train_data( trainSamples, trainClasses );
+CvBoost  boost;
+Mat var_types( 1, trainSamples.cols + 1, CV_8UC1, Scalar(CV_VAR_ORDERED) );
+var_types.at<uchar>( trainSamples.cols ) = CV_VAR_CATEGORICAL;
+CvBoostParams  params( CvBoost::DISCRETE, // boost_type
+                       100, // weak_count
+                       0.95, // weight_trim_rate
+                       2, // max_depth
+                       false, //use_surrogates
+                       0 // priors
+                     );
+boost.train( trainSamples, CV_ROW_SAMPLE, trainClasses, Mat(), Mat(), var_types, Mat(), params );
+
+// ======== version 3.0 ========
+Ptr<Boost> boost = Boost::create();
+boost->setBoostType(Boost::DISCRETE);
+boost->setWeakCount(100);
+boost->setWeightTrimRate(0.95);
+boost->setMaxDepth(2);
+boost->setUseSurrogates(false);
+boost->setPriors(Mat());
+boost->train(prepare_train_data()); // 'prepare_train_data' returns an instance of ml::TrainData class
+@endcode
+
+Features detect {#tutorial_transition_hints_features}
+---------------
+Some algorithms (FREAK, BRIEF, SIFT, SURF) has been moved to _opencv_contrib_ repository, to _xfeatures2d_ module, _xfeatures2d_ namespace. Their interface has been also changed (inherit from `cv::Feature2D` base class).
+
+List of _xfeatures2d_ module classes:
+
+- cv::xfeatures2d::BriefDescriptorExtractor - Class for computing BRIEF descriptors (2.4 location: _features2d_)
+- cv::xfeatures2d::FREAK - Class implementing the FREAK (Fast Retina Keypoint) keypoint descriptor (2.4 location: _features2d_)
+- cv::xfeatures2d::StarDetector - The class implements the  CenSurE detector (2.4 location: _features2d_)
+- cv::xfeatures2d::SIFT - Class for extracting keypoints and computing descriptors using the Scale Invariant Feature Transform (SIFT) algorithm (2.4 location: _nonfree_)
+- cv::xfeatures2d::SURF - Class for extracting Speeded Up Robust Features from an image (2.4 location: _nonfree_)
+
+Following steps are needed:
+1. Add _opencv_contrib_ to compilation process
+2. Include `opencv2/xfeatures2d.h` header
+3. Use namespace `xfeatures2d`
+4. Replace `operator()` calls with `detect`, `compute` or `detectAndCompute` if needed
+
+Some classes now use general methods `detect`, `compute` or `detectAndCompute` provided by `Feature2D` base class instead of custom `operator()`
+
+Following code snippets illustrate the difference (from `video_homography.cpp` example):
+@code{.cpp}
+using namespace cv;
+// ====== 2.4 =======
+#include "opencv2/features2d/features2d.hpp"
+BriefDescriptorExtractor brief(32);
+GridAdaptedFeatureDetector detector(new FastFeatureDetector(10, true), DESIRED_FTRS, 4, 4);
+// ...
+detector.detect(gray, query_kpts); //Find interest points
+brief.compute(gray, query_kpts, query_desc); //Compute brief descriptors at each keypoint location
+// ====== 3.0 =======
+#include "opencv2/features2d.hpp"
+#include "opencv2/xfeatures2d.hpp"
+using namespace cv::xfeatures2d;
+Ptr<BriefDescriptorExtractor> brief = BriefDescriptorExtractor::create(32);
+Ptr<FastFeatureDetector> detector = FastFeatureDetector::create(10, true);
+// ...
+detector->detect(gray, query_kpts); //Find interest points
+brief->compute(gray, query_kpts, query_desc); //Compute brief descriptors at each keypoint location
+@endcode
+
+OpenCL {#tutorial_transition_hints_opencl}
+------
+All specialized `ocl` implemetations has been hidden behind general C++ algorithm interface. Now the function execution path can be selected dynamically at runtime: CPU or OpenCL; this mechanism is also called "Transparent API".
+
+New class cv::UMat is intended to hide data exchange with OpenCL device in a convinient way.
+
+Following example illustrate API modifications (from [OpenCV site](http://opencv.org/platforms/opencl.html)):
+
+-   OpenCL-aware code OpenCV-2.x
+@code{.cpp}
+// initialization
+VideoCapture vcap(...);
+ocl::OclCascadeClassifier fd("haar_ff.xml");
+ocl::oclMat frame, frameGray;
+Mat frameCpu;
+vector<Rect> faces;
+for(;;){
+    // processing loop
+    vcap >> frameCpu;
+    frame = frameCpu;
+    ocl::cvtColor(frame, frameGray, BGR2GRAY);
+    ocl::equalizeHist(frameGray, frameGray);
+    fd.detectMultiScale(frameGray, faces, ...);
+    // draw rectangles …
+    // show image …
+}
+@endcode
+-   OpenCL-aware code OpenCV-3.x
+@code{.cpp}
+// initialization
+VideoCapture vcap(...);
+CascadeClassifier fd("haar_ff.xml");
+UMat frame, frameGray; // the only change from plain CPU version
+vector<Rect> faces;
+for(;;){
+    // processing loop
+    vcap >> frame;
+    cvtColor(frame, frameGray, BGR2GRAY);
+    equalizeHist(frameGray, frameGray);
+    fd.detectMultiScale(frameGray, faces, ...);
+    // draw rectangles …
+    // show image …
+}
+@endcode
+
+CUDA {#tutorial_transition_hints_cuda}
+----
+_cuda_ module has been split into several smaller pieces:
+- _cuda_ - @ref cuda
+- _cudaarithm_ - @ref cudaarithm
+- _cudabgsegm_ - @ref cudabgsegm
+- _cudacodec_ - @ref cudacodec
+- _cudafeatures2d_ - @ref cudafeatures2d
+- _cudafilters_ - @ref cudafilters
+- _cudaimgproc_ - @ref cudaimgproc
+- _cudalegacy_ - @ref cudalegacy
+- _cudaoptflow_ - @ref cudaoptflow
+- _cudastereo_ - @ref cudastereo
+- _cudawarping_ - @ref cudawarping
+- _cudev_ - @ref cudev
+
+`gpu` namespace has been removed, use cv::cuda namespace instead. Many classes has also been renamed, for example:
+- `gpu::FAST_GPU` -> cv::cuda::FastFeatureDetector
+- `gpu::createBoxFilter_GPU` -> cv::cuda::createBoxFilter
+
+Documentation format {#tutorial_transition_docs}
+--------------------
+Documentation has been converted to Doxygen format. You can find updated documentation writing guide in _Tutorials_ section of _OpenCV_ reference documentation (@ref tutorial_documentation).
diff --git a/doc/user_guide/ug_traincascade.markdown b/doc/user_guide/ug_traincascade.markdown
index d35ec6f5f..1bc7ff5f9 100644
--- a/doc/user_guide/ug_traincascade.markdown
+++ b/doc/user_guide/ug_traincascade.markdown
@@ -256,6 +256,12 @@ Command line arguments of opencv_traincascade application grouped by purposes:
         Maximum number of threads to use during training. Notice that the actual number of used
         threads may be lower, depending on your machine and compilation options.
 
+    -   -acceptanceRatioBreakValue \<break_value\>
+
+        This argument is used to determine how precise your model should keep learning and when to stop.
+        A good guideline is to train not further than 10e-5, to ensure the model does not overtrain on your training data.
+        By default this value is set to -1 to disable this feature.
+
 -#  Cascade parameters:
 
     -   -stageType \<BOOST(default)\>
diff --git a/include/opencv/cvaux.h b/include/opencv/cvaux.h
index cb49c086b..fe86c5d98 100644
--- a/include/opencv/cvaux.h
+++ b/include/opencv/cvaux.h
@@ -51,12 +51,6 @@
 #include "opencv2/photo/photo_c.h"
 #include "opencv2/video/tracking_c.h"
 #include "opencv2/objdetect/objdetect_c.h"
-#include "opencv2/contrib/compat.hpp"
-
-#include "opencv2/legacy.hpp"
-#include "opencv2/legacy/compat.hpp"
-#include "opencv2/legacy/blobtrack.hpp"
-
 
 #endif
 
diff --git a/modules/androidcamera/camera_wrapper/camera_wrapper.cpp b/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
index 2a7d01b22..66678c558 100644
--- a/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
+++ b/modules/androidcamera/camera_wrapper/camera_wrapper.cpp
@@ -314,7 +314,9 @@ public:
         cameraId(0),
         cameraCallback(callback),
         userData(_userData),
-        emptyCameraCallbackReported(0)
+        emptyCameraCallbackReported(0),
+        width(),
+        height()
     {
         LOGD("Instantiated new CameraHandler (%p, %p)", callback, _userData);
         void* params_buffer = operator new(sizeof(CameraParameters) + MAGIC_TAIL);
@@ -1122,7 +1124,7 @@ void CameraHandler::applyProperties(CameraHandler** ppcameraHandler)
     if (handler == NULL) {
         LOGE("ERROR in applyProperties --- cannot reinit camera");
         handler=initCameraConnect(cameraCallback, cameraId, userData, NULL);
-        LOGD("CameraHandler::applyProperties(): repeate initCameraConnect after ERROR, handler=0x%x", (int)handler);
+        LOGD("CameraHandler::applyProperties(): repeat initCameraConnect after ERROR, handler=0x%x", (int)handler);
         if (handler == NULL) {
             LOGE("ERROR in applyProperties --- cannot reinit camera AGAIN --- cannot do anything else");
         }
diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp
index d059eed94..7b01a7bbc 100644
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@@ -184,7 +184,8 @@ namespace cv
 
 //! type of the robust estimation algorithm
 enum { LMEDS  = 4, //!< least-median algorithm
-       RANSAC = 8  //!< RANSAC algorithm
+       RANSAC = 8, //!< RANSAC algorithm
+       RHO    = 16 //!< RHO algorithm
      };
 
 enum { SOLVEPNP_ITERATIVE = 0,
@@ -265,8 +266,9 @@ a vector\<Point2f\> .
 -   **0** - a regular method using all the points
 -   **RANSAC** - RANSAC-based robust method
 -   **LMEDS** - Least-Median robust method
+-   **RHO**    - PROSAC-based robust method
 @param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier
-(used in the RANSAC method only). That is, if
+(used in the RANSAC and RHO methods only). That is, if
 \f[\| \texttt{dstPoints} _i -  \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \|  >  \texttt{ransacReprojThreshold}\f]
 then the point \f$i\f$ is considered an outlier. If srcPoints and dstPoints are measured in pixels,
 it usually makes sense to set this parameter somewhere in the range of 1 to 10.
@@ -289,7 +291,7 @@ pairs to compute an initial homography estimate with a simple least-squares sche
 
 However, if not all of the point pairs ( \f$srcPoints_i\f$, \f$dstPoints_i\f$ ) fit the rigid perspective
 transformation (that is, there are some outliers), this initial estimate will be poor. In this case,
-you can use one of the two robust methods. Both methods, RANSAC and LMeDS , try many different
+you can use one of the three robust methods. The methods RANSAC, LMeDS and RHO try many different
 random subsets of the corresponding point pairs (of four pairs each), estimate the homography matrix
 using this subset and a simple least-square algorithm, and then compute the quality/goodness of the
 computed homography (which is the number of inliers for RANSAC or the median re-projection error for
@@ -300,7 +302,7 @@ Regardless of the method, robust or not, the computed homography matrix is refin
 inliers only in case of a robust method) with the Levenberg-Marquardt method to reduce the
 re-projection error even more.
 
-The method RANSAC can handle practically any ratio of outliers but it needs a threshold to
+The methods RANSAC and RHO can handle practically any ratio of outliers but need a threshold to
 distinguish inliers from outliers. The method LMeDS does not need any threshold but it works
 correctly only when there are more than 50% of inliers. Finally, if there are no outliers and the
 noise is rather small, use the default method (method=0).
@@ -512,6 +514,16 @@ projections, as well as the camera matrix and the distortion coefficients.
 @note
    -   An example of how to use solvePnP for planar augmented reality can be found at
         opencv_source_code/samples/python2/plane_ar.py
+   -   If you are using Python:
+        - Numpy array slices won't work as input because solvePnP requires contiguous
+        arrays (enforced by the assertion using cv::Mat::checkVector() around line 55 of
+        modules/calib3d/src/solvepnp.cpp version 2.4.9)
+        - The P3P algorithm requires image points to be in an array of shape (N,1,2) due
+        to its calling of cv::undistortPoints (around line 75 of modules/calib3d/src/solvepnp.cpp version 2.4.9)
+        which requires 2-channel information.
+        - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
+        it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
+        np.ascontiguousarray(D[:,:2]).reshape((N,1,2))
  */
 CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
                             InputArray cameraMatrix, InputArray distCoeffs,
diff --git a/modules/calib3d/perf/perf_pnp.cpp b/modules/calib3d/perf/perf_pnp.cpp
index 55584f819..e5a92bf1c 100644
--- a/modules/calib3d/perf/perf_pnp.cpp
+++ b/modules/calib3d/perf/perf_pnp.cpp
@@ -19,8 +19,8 @@ typedef perf::TestBaseWithParam<int> PointsNum;
 
 PERF_TEST_P(PointsNum_Algo, solvePnP,
             testing::Combine(
-                testing::Values(4, 3*9, 7*13), //TODO: find why results on 4 points are too unstable
-                testing::Values((int)SOLVEPNP_ITERATIVE, (int)SOLVEPNP_EPNP)
+                testing::Values(5, 3*9, 7*13), //TODO: find why results on 4 points are too unstable
+                testing::Values((int)SOLVEPNP_ITERATIVE, (int)SOLVEPNP_EPNP, (int)SOLVEPNP_UPNP, (int)SOLVEPNP_DLS)
                 )
             )
 {
@@ -64,13 +64,15 @@ PERF_TEST_P(PointsNum_Algo, solvePnP,
 
 PERF_TEST_P(PointsNum_Algo, solvePnPSmallPoints,
             testing::Combine(
-                testing::Values(4), //TODO: find why results on 4 points are too unstable
-                testing::Values((int)SOLVEPNP_P3P, (int)SOLVEPNP_DLS, (int)SOLVEPNP_UPNP)
+                testing::Values(5),
+                testing::Values((int)SOLVEPNP_P3P, (int)SOLVEPNP_EPNP, (int)SOLVEPNP_DLS, (int)SOLVEPNP_UPNP)
                 )
             )
 {
     int pointsNum = get<0>(GetParam());
     pnpAlgo algo = get<1>(GetParam());
+    if( algo == SOLVEPNP_P3P )
+        pointsNum = 4;
 
     vector<Point2f> points2d(pointsNum);
     vector<Point3f> points3d(pointsNum);
@@ -92,7 +94,7 @@ PERF_TEST_P(PointsNum_Algo, solvePnPSmallPoints,
 
     //add noise
     Mat noise(1, (int)points2d.size(), CV_32FC2);
-    randu(noise, 0, 0.01);
+    randu(noise, -0.001, 0.001);
     add(points2d, noise, points2d);
 
     declare.in(points3d, points2d);
@@ -107,7 +109,7 @@ PERF_TEST_P(PointsNum_Algo, solvePnPSmallPoints,
     SANITY_CHECK(tvec, 1e-2);
 }
 
-PERF_TEST_P(PointsNum, DISABLED_SolvePnPRansac, testing::Values(4, 3*9, 7*13))
+PERF_TEST_P(PointsNum, DISABLED_SolvePnPRansac, testing::Values(5, 3*9, 7*13))
 {
     int count = GetParam();
 
diff --git a/modules/calib3d/src/calibration.cpp b/modules/calib3d/src/calibration.cpp
index 5a86624c5..70f2aafed 100644
--- a/modules/calib3d/src/calibration.cpp
+++ b/modules/calib3d/src/calibration.cpp
@@ -1595,7 +1595,10 @@ void cvCalibrationMatrixValues( const CvMat *calibMatr, CvSize imgSize,
         my = imgHeight / apertureHeight;
     } else {
         mx = 1.0;
-        my = *pasp;
+        if(pasp)
+            my = *pasp;
+        else
+            my = 1.0;
     }
 
     /* Calculate fovx and fovy. */
diff --git a/modules/calib3d/src/epnp.cpp b/modules/calib3d/src/epnp.cpp
index edbcaffd3..ec7dfe0ad 100644
--- a/modules/calib3d/src/epnp.cpp
+++ b/modules/calib3d/src/epnp.cpp
@@ -2,7 +2,10 @@
 #include "precomp.hpp"
 #include "epnp.h"
 
-epnp::epnp(const cv::Mat& cameraMatrix, const cv::Mat& opoints, const cv::Mat& ipoints)
+namespace cv
+{
+
+epnp::epnp(const Mat& cameraMatrix, const Mat& opoints, const Mat& ipoints)
 {
   if (cameraMatrix.depth() == CV_32F)
       init_camera_parameters<float>(cameraMatrix);
@@ -17,14 +20,14 @@ epnp::epnp(const cv::Mat& cameraMatrix, const cv::Mat& opoints, const cv::Mat& i
   if (opoints.depth() == ipoints.depth())
   {
     if (opoints.depth() == CV_32F)
-      init_points<cv::Point3f,cv::Point2f>(opoints, ipoints);
+      init_points<Point3f,Point2f>(opoints, ipoints);
     else
-      init_points<cv::Point3d,cv::Point2d>(opoints, ipoints);
+      init_points<Point3d,Point2d>(opoints, ipoints);
   }
   else if (opoints.depth() == CV_32F)
-    init_points<cv::Point3f,cv::Point2d>(opoints, ipoints);
+    init_points<Point3f,Point2d>(opoints, ipoints);
   else
-    init_points<cv::Point3d,cv::Point2f>(opoints, ipoints);
+    init_points<Point3d,Point2f>(opoints, ipoints);
 
   alphas.resize(4 * number_of_correspondences);
   pcs.resize(3 * number_of_correspondences);
@@ -144,7 +147,7 @@ void epnp::compute_pcs(void)
   }
 }
 
-void epnp::compute_pose(cv::Mat& R, cv::Mat& t)
+void epnp::compute_pose(Mat& R, Mat& t)
 {
   choose_control_points();
   compute_barycentric_coordinates();
@@ -189,8 +192,8 @@ void epnp::compute_pose(cv::Mat& R, cv::Mat& t)
   if (rep_errors[2] < rep_errors[1]) N = 2;
   if (rep_errors[3] < rep_errors[N]) N = 3;
 
-  cv::Mat(3, 1, CV_64F, ts[N]).copyTo(t);
-  cv::Mat(3, 3, CV_64F, Rs[N]).copyTo(R);
+  Mat(3, 1, CV_64F, ts[N]).copyTo(t);
+  Mat(3, 3, CV_64F, Rs[N]).copyTo(R);
 }
 
 void epnp::copy_R_and_t(const double R_src[3][3], const double t_src[3],
@@ -621,3 +624,5 @@ void epnp::qr_solve(CvMat * A, CvMat * b, CvMat * X)
     pX[i] = (pb[i] - sum) / A2[i];
   }
 }
+
+}
diff --git a/modules/calib3d/src/epnp.h b/modules/calib3d/src/epnp.h
index 2619f7595..350e9d482 100644
--- a/modules/calib3d/src/epnp.h
+++ b/modules/calib3d/src/epnp.h
@@ -4,6 +4,9 @@
 #include "precomp.hpp"
 #include "opencv2/core/core_c.h"
 
+namespace cv
+{
+
 class epnp {
  public:
   epnp(const cv::Mat& cameraMatrix, const cv::Mat& opoints, const cv::Mat& ipoints);
@@ -78,4 +81,6 @@ class epnp {
   double * A1, * A2;
 };
 
+}
+
 #endif
diff --git a/modules/calib3d/src/fundam.cpp b/modules/calib3d/src/fundam.cpp
index c700ece70..a97ed2c70 100644
--- a/modules/calib3d/src/fundam.cpp
+++ b/modules/calib3d/src/fundam.cpp
@@ -41,6 +41,7 @@
 //M*/
 
 #include "precomp.hpp"
+#include "rho.h"
 #include <iostream>
 
 namespace cv
@@ -69,20 +70,6 @@ static bool haveCollinearPoints( const Mat& m, int count )
 }
 
 
-template<typename T> int compressPoints( T* ptr, const uchar* mask, int mstep, int count )
-{
-    int i, j;
-    for( i = j = 0; i < count; i++ )
-        if( mask[i*mstep] )
-        {
-            if( i > j )
-                ptr[j] = ptr[i];
-            j++;
-        }
-    return j;
-}
-
-
 class HomographyEstimatorCallback : public PointSetRegistrator::Callback
 {
 public:
@@ -273,6 +260,85 @@ public:
 }
 
 
+
+namespace cv{
+static bool createAndRunRHORegistrator(double confidence,
+                                       int    maxIters,
+                                       double ransacReprojThreshold,
+                                       int    npoints,
+                                       InputArray  _src,
+                                       InputArray  _dst,
+                                       OutputArray _H,
+                                       OutputArray _tempMask){
+    Mat    src = _src.getMat();
+    Mat    dst = _dst.getMat();
+    Mat    tempMask;
+    bool   result;
+    double beta = 0.35;/* 0.35 is a value that often works. */
+
+    /* Create temporary output matrix (RHO outputs a single-precision H only). */
+    Mat tmpH = Mat(3, 3, CV_32FC1);
+
+    /* Create output mask. */
+    tempMask = Mat(npoints, 1, CV_8U);
+
+    /**
+     * Make use of the RHO estimator API.
+     *
+     * This is where the math happens. A homography estimation context is
+     * initialized, used, then finalized.
+     */
+
+    Ptr<RHO_HEST> p = rhoInit();
+
+    /**
+     * Optional. Ideally, the context would survive across calls to
+     * findHomography(), but no clean way appears to exit to do so. The price
+     * to pay is marginally more computational work than strictly needed.
+     */
+
+    rhoEnsureCapacity(p, npoints, beta);
+
+    /**
+     * The critical call. All parameters are heavily documented in rhorefc.h.
+     *
+     * Currently, NR (Non-Randomness criterion) and Final Refinement (with
+     * internal, optimized Levenberg-Marquardt method) are enabled. However,
+     * while refinement seems to correctly smooth jitter most of the time, when
+     * refinement fails it tends to make the estimate visually very much worse.
+     * It may be necessary to remove the refinement flags in a future commit if
+     * this behaviour is too problematic.
+     */
+
+    result = !!rhoHest(p,
+                      (const float*)src.data,
+                      (const float*)dst.data,
+                      (char*)       tempMask.data,
+                      (unsigned)    npoints,
+                      (float)       ransacReprojThreshold,
+                      (unsigned)    maxIters,
+                      (unsigned)    maxIters,
+                      confidence,
+                      4U,
+                      beta,
+                      RHO_FLAG_ENABLE_NR | RHO_FLAG_ENABLE_FINAL_REFINEMENT,
+                      NULL,
+                      (float*)tmpH.data);
+
+    /* Convert float homography to double precision. */
+    tmpH.convertTo(_H, CV_64FC1);
+
+    /* Maps non-zero mask elems to 1, for the sake of the testcase. */
+    for(int k=0;k<npoints;k++){
+        tempMask.data[k] = !!tempMask.data[k];
+    }
+    tempMask.copyTo(_tempMask);
+
+    return result;
+}
+}
+
+
 cv::Mat cv::findHomography( InputArray _points1, InputArray _points2,
                             int method, double ransacReprojThreshold, OutputArray _mask,
                             const int maxIters, const double confidence)
@@ -317,13 +383,15 @@ cv::Mat cv::findHomography( InputArray _points1, InputArray _points2,
         result = createRANSACPointSetRegistrator(cb, 4, ransacReprojThreshold, confidence, maxIters)->run(src, dst, H, tempMask);
     else if( method == LMEDS )
         result = createLMeDSPointSetRegistrator(cb, 4, confidence, maxIters)->run(src, dst, H, tempMask);
+    else if( method == RHO )
+        result = createAndRunRHORegistrator(confidence, maxIters, ransacReprojThreshold, npoints, src, dst, H, tempMask);
     else
         CV_Error(Error::StsBadArg, "Unknown estimation method");
 
-    if( result && npoints > 4 )
+    if( result && npoints > 4 && method != RHO)
     {
-        compressPoints( src.ptr<Point2f>(), tempMask.ptr<uchar>(), 1, npoints );
-        npoints = compressPoints( dst.ptr<Point2f>(), tempMask.ptr<uchar>(), 1, npoints );
+        compressElems( src.ptr<Point2f>(), tempMask.ptr<uchar>(), 1, npoints );
+        npoints = compressElems( dst.ptr<Point2f>(), tempMask.ptr<uchar>(), 1, npoints );
         if( npoints > 0 )
         {
             Mat src1 = src.rowRange(0, npoints);
diff --git a/modules/calib3d/src/precomp.hpp b/modules/calib3d/src/precomp.hpp
index e8a81120f..83a513dca 100644
--- a/modules/calib3d/src/precomp.hpp
+++ b/modules/calib3d/src/precomp.hpp
@@ -102,6 +102,19 @@ CV_EXPORTS Ptr<PointSetRegistrator> createRANSACPointSetRegistrator(const Ptr<Po
 CV_EXPORTS Ptr<PointSetRegistrator> createLMeDSPointSetRegistrator(const Ptr<PointSetRegistrator::Callback>& cb,
                                                                    int modelPoints, double confidence=0.99, int maxIters=1000 );
 
+template<typename T> inline int compressElems( T* ptr, const uchar* mask, int mstep, int count )
+{
+    int i, j;
+    for( i = j = 0; i < count; i++ )
+        if( mask[i*mstep] )
+        {
+            if( i > j )
+                ptr[j] = ptr[i];
+            j++;
+        }
+    return j;
+}
+
 }
 
 #endif
diff --git a/modules/calib3d/src/rho.cpp b/modules/calib3d/src/rho.cpp
new file mode 100644
index 000000000..2f27728a2
--- /dev/null
+++ b/modules/calib3d/src/rho.cpp
@@ -0,0 +1,2673 @@
+/*
+  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+
+  By downloading, copying, installing or using the software you agree to this license.
+  If you do not agree to this license, do not download, install,
+  copy or use the software.
+
+
+                          BSD 3-Clause License
+
+ Copyright (C) 2014, Olexa Bilaniuk, Hamid Bazargani & Robert Laganiere, all rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+   * Redistribution's of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+
+   * Redistribution's in binary form must reproduce the above copyright notice,
+     this list of conditions and the following disclaimer in the documentation
+     and/or other materials provided with the distribution.
+
+   * The name of the copyright holders may not be used to endorse or promote products
+     derived from this software without specific prior written permission.
+
+ This software is provided by the copyright holders and contributors "as is" and
+ any express or implied warranties, including, but not limited to, the implied
+ warranties of merchantability and fitness for a particular purpose are disclaimed.
+ In no event shall the Intel Corporation or contributors be liable for any direct,
+ indirect, incidental, special, exemplary, or consequential damages
+ (including, but not limited to, procurement of substitute goods or services;
+ loss of use, data, or profits; or business interruption) however caused
+ and on any theory of liability, whether in contract, strict liability,
+ or tort (including negligence or otherwise) arising in any way out of
+ the use of this software, even if advised of the possibility of such damage.
+*/
+
+/**
+ * Bilaniuk, Olexa, Hamid Bazargani, and Robert Laganiere. "Fast Target
+ * Recognition on Mobile Devices: Revisiting Gaussian Elimination for the
+ * Estimation of Planar Homographies." In Computer Vision and Pattern
+ * Recognition Workshops (CVPRW), 2014 IEEE Conference on, pp. 119-125.
+ * IEEE, 2014.
+ */
+
+/* Includes */
+#include <precomp.hpp>
+#include <opencv2/core.hpp>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stddef.h>
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+#include <vector>
+#include "rho.h"
+
+
+
+
+
+/* For the sake of cv:: namespace ONLY: */
+namespace cv{/* For C support, replace with extern "C" { */
+
+
+/* Constants */
+const int    MEM_ALIGN              = 32;
+const size_t HSIZE                  = (3*3*sizeof(float));
+const double MIN_DELTA_CHNG         = 0.1;
+// const double CHI_STAT               = 2.706;
+const double CHI_SQ                 = 1.645;
+// const double RLO                    = 0.25;
+// const double RHI                    = 0.75;
+const int    MAXLEVMARQITERS        = 100;
+const int    SMPL_SIZE              = 4;      /* 4 points required per model */
+const int    SPRT_T_M               = 25;     /* Guessing 25 match evlauations / 1 model generation */
+const int    SPRT_M_S               = 1;      /* 1 model per sample */
+const double SPRT_EPSILON           = 0.1;    /* No explanation */
+const double SPRT_DELTA             = 0.01;   /* No explanation */
+const double LM_GAIN_LO             = 0.25;   /* See sacLMGain(). */
+const double LM_GAIN_HI             = 0.75;   /* See sacLMGain(). */
+
+
+/* Data Structures */
+
+/**
+ * Base Struct for RHO algorithm.
+ *
+ * A RHO estimator has initialization, finalization, capacity, seeding and
+ * homography-estimation APIs that must be implemented.
+ */
+
+struct RHO_HEST{
+    /* This is a virtual base class; It should have a virtual destructor. */
+    virtual ~RHO_HEST(){}
+
+    /* External Interface Methods */
+
+    /**
+     * Initialization work.
+     *
+     * @return 0 if initialization is unsuccessful; non-zero otherwise.
+     */
+
+    virtual inline int    initialize(void){return 1;}
+
+
+    /**
+     * Finalization work.
+     */
+
+    virtual inline void   finalize(void){}
+
+    /**
+     * Ensure that the estimator context's internal table for the non-randomness
+     * criterion is at least of the given size, and uses the given beta. The table
+     * should be larger than the maximum number of matches fed into the estimator.
+     *
+     * A value of N of 0 requests deallocation of the table.
+     *
+     * @param [in] N     If 0, deallocate internal table. If > 0, ensure that the
+     *                   internal table is of at least this size, reallocating if
+     *                   necessary.
+     * @param [in] beta  The beta-factor to use within the table.
+     * @return 0 if unsuccessful; non-zero otherwise.
+     */
+
+    virtual inline int    ensureCapacity(unsigned N, double beta){
+        (void)N;
+        (void)beta;
+
+        return 1;
+    }
+
+
+    /**
+     * Generates a random double uniformly distributed in the range [0, 1).
+     *
+     * The default implementation uses the xorshift128+ algorithm from
+     * Sebastiano Vigna. Further scramblings of Marsaglia's xorshift generators.
+     * CoRR, abs/1402.6246, 2014.
+     * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+     *
+     * Source roughly as given in
+     * http://en.wikipedia.org/wiki/Xorshift#Xorshift.2B
+     */
+
+    virtual inline double fastRandom(void){
+        uint64_t x = prng.s[0];
+        uint64_t y = prng.s[1];
+        x ^= x << 23; // a
+        x ^= x >> 17; // b
+        x ^= y ^ (y >> 26); // c
+        prng.s[0] = y;
+        prng.s[1] = x;
+        uint64_t s = x + y;
+
+        return s * 5.421010862427522e-20;/* 2^-64 */
+    }
+
+
+    /**
+     * Seeds the context's PRNG.
+     *
+     * @param [in] seed  A 64-bit unsigned integer seed.
+     */
+
+    virtual inline void   fastSeed(uint64_t seed){
+        int i;
+
+        prng.s[0] =  seed;
+        prng.s[1] = ~seed;/* Guarantees one of the elements will be non-zero. */
+
+        /**
+         * Escape from zero-land (see xorshift128+ paper). Approximately 20
+         * iterations required according to the graph.
+         */
+
+        for(i=0;i<20;i++){
+            fastRandom();
+        }
+    }
+
+
+    /**
+     * Estimates the homography using the given context, matches and parameters to
+     * PROSAC.
+     *
+     * @param [in]     src     The pointer to the source points of the matches.
+     *                             Cannot be NULL.
+     * @param [in]     dst     The pointer to the destination points of the matches.
+     *                             Cannot be NULL.
+     * @param [out]    inl     The pointer to the output mask of inlier matches.
+     *                             May be NULL.
+     * @param [in]     N       The number of matches.
+     * @param [in]     maxD    The maximum distance.
+     * @param [in]     maxI    The maximum number of PROSAC iterations.
+     * @param [in]     rConvg  The RANSAC convergence parameter.
+     * @param [in]     cfd     The required confidence in the solution.
+     * @param [in]     minInl  The minimum required number of inliers.
+     * @param [in]     beta    The beta-parameter for the non-randomness criterion.
+     * @param [in]     flags   A union of flags to control the estimation.
+     * @param [in]     guessH  An extrinsic guess at the solution H, or NULL if
+     *                         none provided.
+     * @param [out]    finalH  The final estimation of H, or the zero matrix if
+     *                         the minimum number of inliers was not met.
+     *                         Cannot be NULL.
+     * @return                 The number of inliers if the minimum number of
+     *                         inliers for acceptance was reached; 0 otherwise.
+     */
+
+    virtual unsigned      rhoHest(const float*   src,     /* Source points */
+                                  const float*   dst,     /* Destination points */
+                                  char*          inl,     /* Inlier mask */
+                                  unsigned       N,       /*  = src.length = dst.length = inl.length */
+                                  float          maxD,    /* Works:     3.0 */
+                                  unsigned       maxI,    /* Works:    2000 */
+                                  unsigned       rConvg,  /* Works:    2000 */
+                                  double         cfd,     /* Works:   0.995 */
+                                  unsigned       minInl,  /* Minimum:     4 */
+                                  double         beta,    /* Works:    0.35 */
+                                  unsigned       flags,   /* Works:       0 */
+                                  const float*   guessH,  /* Extrinsic guess, NULL if none provided */
+                                  float*         finalH) = 0; /* Final result. */
+
+
+
+    /* PRNG XORshift128+ */
+    struct{
+        uint64_t  s[2];            /* PRNG state */
+    } prng;
+};
+
+
+
+/**
+ * Generic C implementation of RHO algorithm.
+ */
+
+struct RHO_HEST_REFC : RHO_HEST{
+    /**
+     * Virtual Arguments.
+     *
+     * Exactly the same as at function call, except:
+     * - minInl is enforced to be >= 4.
+     */
+
+    struct{
+        const float* src;
+        const float* dst;
+        char*        inl;
+        unsigned     N;
+        float        maxD;
+        unsigned     maxI;
+        unsigned     rConvg;
+        double       cfd;
+        unsigned     minInl;
+        double       beta;
+        unsigned     flags;
+        const float* guessH;
+        float*       finalH;
+    } arg;
+
+    /* PROSAC Control */
+    struct{
+        unsigned  i;               /* Iteration Number */
+        unsigned  phNum;           /* Phase Number */
+        unsigned  phEndI;          /* Phase End Iteration */
+        double    phEndFpI;        /* Phase floating-point End Iteration */
+        unsigned  phMax;           /* Termination phase number */
+        unsigned  phNumInl;        /* Number of inliers for termination phase */
+        unsigned  numModels;       /* Number of models tested */
+        unsigned* smpl;            /* Sample of match indexes */
+    } ctrl;
+
+    /* Current model being tested */
+    struct{
+        float*    pkdPts;          /* Packed points */
+        float*    H;               /* Homography */
+        char*     inl;             /* Mask of inliers */
+        unsigned  numInl;          /* Number of inliers */
+    } curr;
+
+    /* Best model (so far) */
+    struct{
+        float*    H;               /* Homography */
+        char*     inl;             /* Mask of inliers */
+        unsigned  numInl;          /* Number of inliers */
+    } best;
+
+    /* Non-randomness criterion */
+    struct{
+        std::vector<unsigned> tbl; /* Non-Randomness: Table */
+        unsigned  size;            /* Non-Randomness: Size */
+        double    beta;            /* Non-Randomness: Beta */
+    } nr;
+
+    /* SPRT Evaluator */
+    struct{
+        double    t_M;             /* t_M */
+        double    m_S;             /* m_S */
+        double    epsilon;         /* Epsilon */
+        double    delta;           /* delta */
+        double    A;               /* SPRT Threshold */
+        unsigned  Ntested;         /* Number of points tested */
+        unsigned  Ntestedtotal;    /* Number of points tested in total */
+        int       good;            /* Good/bad flag */
+        double    lambdaAccept;    /* Accept multiplier */
+        double    lambdaReject;    /* Reject multiplier */
+    } eval;
+
+    /* Levenberg-Marquardt Refinement */
+    struct{
+        float  (* JtJ)[8];         /* JtJ matrix */
+        float  (* tmp1)[8];        /* Temporary 1 */
+        float*    Jte;             /* Jte vector */
+    } lm;
+
+    /* Memory Management */
+    struct{
+        cv::Mat perObj;
+        cv::Mat perRun;
+    } mem;
+
+    /* Initialized? */
+    int initialized;
+
+
+    /* Empty constructors and destructors */
+    public:
+    RHO_HEST_REFC();
+    private: /* Forbid copying. */
+    RHO_HEST_REFC(const RHO_HEST_REFC&);
+    public:
+    ~RHO_HEST_REFC();
+
+    /* Methods to implement external interface */
+    inline int    initialize(void);
+    inline void   finalize(void);
+    inline int    ensureCapacity(unsigned N, double beta);
+    unsigned      rhoHest(const float*   src,     /* Source points */
+                          const float*   dst,     /* Destination points */
+                          char*          inl,     /* Inlier mask */
+                          unsigned       N,       /*  = src.length = dst.length = inl.length */
+                          float          maxD,    /* Works:     3.0 */
+                          unsigned       maxI,    /* Works:    2000 */
+                          unsigned       rConvg,  /* Works:    2000 */
+                          double         cfd,     /* Works:   0.995 */
+                          unsigned       minInl,  /* Minimum:     4 */
+                          double         beta,    /* Works:    0.35 */
+                          unsigned       flags,   /* Works:       0 */
+                          const float*   guessH,  /* Extrinsic guess, NULL if none provided */
+                          float*         finalH); /* Final result. */
+
+
+
+    /* Methods to implement internals */
+    inline void   allocatePerObj(void);
+    inline void   allocatePerRun(void);
+    inline void   deallocatePerRun(void);
+    inline void   deallocatePerObj(void);
+    inline int    initRun(void);
+    inline void   finiRun(void);
+    inline int    haveExtrinsicGuess(void);
+    inline int    hypothesize(void);
+    inline int    verify(void);
+    inline int    isNREnabled(void);
+    inline int    isRefineEnabled(void);
+    inline int    isFinalRefineEnabled(void);
+    inline int    PROSACPhaseEndReached(void);
+    inline void   PROSACGoToNextPhase(void);
+    inline void   getPROSACSample(void);
+    inline void   rndSmpl(unsigned  sampleSize,
+                          unsigned* currentSample,
+                          unsigned  dataSetSize);
+    inline int    isSampleDegenerate(void);
+    inline void   generateModel(void);
+    inline int    isModelDegenerate(void);
+    inline void   evaluateModelSPRT(void);
+    inline void   updateSPRT(void);
+    inline void   designSPRTTest(void);
+    inline int    isBestModel(void);
+    inline int    isBestModelGoodEnough(void);
+    inline void   saveBestModel(void);
+    inline void   nStarOptimize(void);
+    inline void   updateBounds(void);
+    inline void   outputModel(void);
+    inline void   outputZeroH(void);
+    inline int    canRefine(void);
+    inline void   refine(void);
+};
+
+
+
+
+/**
+ * Prototypes for purely-computational code.
+ */
+
+static inline void   sacInitNonRand       (double    beta,
+                                           unsigned  start,
+                                           unsigned  N,
+                                           unsigned* nonRandMinInl);
+static inline double sacInitPEndFpI       (const unsigned ransacConvg,
+                                           const unsigned n,
+                                           const unsigned s);
+static inline unsigned sacCalcIterBound   (double   confidence,
+                                           double   inlierRate,
+                                           unsigned sampleSize,
+                                           unsigned maxIterBound);
+static inline void   hFuncRefC            (float* packedPoints, float* H);
+static inline void   sacCalcJacobianErrors(const float* H,
+                                           const float* src,
+                                           const float* dst,
+                                           const char*  inl,
+                                           unsigned     N,
+                                           float     (* JtJ)[8],
+                                           float*       Jte,
+                                           float*       Sp);
+static inline float  sacLMGain            (const float*  dH,
+                                           const float*  Jte,
+                                           const float   S,
+                                           const float   newS,
+                                           const float   lambda);
+static inline int    sacChol8x8Damped     (const float (*A)[8],
+                                           float         lambda,
+                                           float       (*L)[8]);
+static inline void   sacTRInv8x8          (const float (*L)[8],
+                                           float       (*M)[8]);
+static inline void   sacTRISolve8x8       (const float (*L)[8],
+                                           const float*  Jte,
+                                           float*        dH);
+static inline void   sacSub8x1            (float*       Hout,
+                                           const float* H,
+                                           const float* dH);
+
+
+
+/* Functions */
+
+/**
+ * External access to context constructor.
+ *
+ * @return A pointer to the context if successful; NULL if an error occured.
+ */
+
+Ptr<RHO_HEST> rhoInit(void){
+    /* Select an optimized implementation of RHO here. */
+
+#if 1
+    /**
+     * For now, only the generic C implementation is available. In the future,
+     * SSE2/AVX/AVX2/FMA/NEON versions may be added, and they will be selected
+     * depending on cv::checkHardwareSupport()'s return values.
+     */
+
+    Ptr<RHO_HEST> p = Ptr<RHO_HEST>(new RHO_HEST_REFC);
+#endif
+
+    /* Initialize it. */
+    if(p){
+        if(!p->initialize()){
+            p.release();
+        }
+    }
+
+    /* Return it. */
+    return p;
+}
+
+
+/**
+ * External access to non-randomness table resize.
+ */
+
+int  rhoEnsureCapacity(Ptr<RHO_HEST> p, unsigned N, double beta){
+    return p->ensureCapacity(N, beta);
+}
+
+
+/**
+ * Seeds the internal PRNG with the given seed.
+ */
+
+void rhoSeed(Ptr<RHO_HEST> p, uint64_t seed){
+    p->fastSeed(seed);
+}
+
+
+/**
+ * Estimates the homography using the given context, matches and parameters to
+ * PROSAC.
+ *
+ * @param [in/out] p       The context to use for homography estimation. Must
+ *                             be already initialized. Cannot be NULL.
+ * @param [in]     src     The pointer to the source points of the matches.
+ *                             Must be aligned to 4 bytes. Cannot be NULL.
+ * @param [in]     dst     The pointer to the destination points of the matches.
+ *                             Must be aligned to 16 bytes. Cannot be NULL.
+ * @param [out]    inl     The pointer to the output mask of inlier matches.
+ *                             Must be aligned to 16 bytes. May be NULL.
+ * @param [in]     N       The number of matches.
+ * @param [in]     maxD    The maximum distance.
+ * @param [in]     maxI    The maximum number of PROSAC iterations.
+ * @param [in]     rConvg  The RANSAC convergence parameter.
+ * @param [in]     cfd     The required confidence in the solution.
+ * @param [in]     minInl  The minimum required number of inliers.
+ * @param [in]     beta    The beta-parameter for the non-randomness criterion.
+ * @param [in]     flags   A union of flags to control the estimation.
+ * @param [in]     guessH  An extrinsic guess at the solution H, or NULL if
+ *                         none provided.
+ * @param [out]    finalH  The final estimation of H, or the zero matrix if
+ *                         the minimum number of inliers was not met.
+ *                         Cannot be NULL.
+ * @return                 The number of inliers if the minimum number of
+ *                         inliers for acceptance was reached; 0 otherwise.
+ */
+
+unsigned rhoHest(Ptr<RHO_HEST> p,       /* Homography estimation context. */
+                 const float*  src,     /* Source points */
+                 const float*  dst,     /* Destination points */
+                 char*         inl,     /* Inlier mask */
+                 unsigned      N,       /*  = src.length = dst.length = inl.length */
+                 float         maxD,    /* Works:     3.0 */
+                 unsigned      maxI,    /* Works:    2000 */
+                 unsigned      rConvg,  /* Works:    2000 */
+                 double        cfd,     /* Works:   0.995 */
+                 unsigned      minInl,  /* Minimum:     4 */
+                 double        beta,    /* Works:    0.35 */
+                 unsigned      flags,   /* Works:       0 */
+                 const float*  guessH,  /* Extrinsic guess, NULL if none provided */
+                 float*        finalH){ /* Final result. */
+    return p->rhoHest(src, dst, inl, N, maxD, maxI, rConvg, cfd, minInl, beta,
+                      flags, guessH, finalH);
+}
+
+
+
+
+
+
+
+
+
+
+
+
+/*********************** RHO_HEST_REFC implementation **********************/
+
+/**
+ * Constructor for RHO_HEST_REFC.
+ *
+ * Does nothing. True initialization is done by initialize().
+ */
+
+RHO_HEST_REFC::RHO_HEST_REFC() : initialized(0){
+
+}
+
+/**
+ * Private copy constructor for RHO_HEST_REFC. Disabled.
+ */
+
+RHO_HEST_REFC::RHO_HEST_REFC(const RHO_HEST_REFC&) : initialized(0){
+
+}
+
+/**
+ * Destructor for RHO_HEST_REFC.
+ */
+
+RHO_HEST_REFC::~RHO_HEST_REFC(){
+    if(initialized){
+        finalize();
+    }
+}
+
+
+
+/**
+ * Initialize the estimator context, by allocating the aligned buffers
+ * internally needed.
+ *
+ * Currently there are 5 per-estimator buffers:
+ * - The buffer of m indexes representing a sample
+ * - The buffer of 16 floats representing m matches (x,y) -> (X,Y).
+ * - The buffer for the current homography
+ * - The buffer for the best-so-far homography
+ * - Optionally, the non-randomness criterion table
+ *
+ * Returns 0 if unsuccessful and non-0 otherwise.
+ */
+
+inline int    RHO_HEST_REFC::initialize(void){
+    initialized = 0;
+
+
+    allocatePerObj();
+
+    curr.inl    = NULL;
+    curr.numInl = 0;
+
+    best.inl    = NULL;
+    best.numInl = 0;
+
+    nr.size     = 0;
+    nr.beta     = 0.0;
+
+
+    fastSeed((uint64_t)~0);
+
+
+    int areAllAllocsSuccessful = !mem.perObj.empty();
+
+    if(!areAllAllocsSuccessful){
+        finalize();
+    }else{
+        initialized = 1;
+    }
+
+    return areAllAllocsSuccessful;
+}
+
+/**
+ * Finalize.
+ *
+ * Finalize the estimator context, by freeing the aligned buffers used
+ * internally.
+ */
+
+inline void   RHO_HEST_REFC::finalize(void){
+    if(initialized){
+        deallocatePerObj();
+
+        initialized = 0;
+    }
+}
+
+/**
+ * Ensure that the estimator context's internal table for non-randomness
+ * criterion is at least of the given size, and uses the given beta. The table
+ * should be larger than the maximum number of matches fed into the estimator.
+ *
+ * A value of N of 0 requests deallocation of the table.
+ *
+ * @param [in] N     If 0, deallocate internal table. If > 0, ensure that the
+ *                   internal table is of at least this size, reallocating if
+ *                   necessary.
+ * @param [in] beta  The beta-factor to use within the table.
+ * @return 0 if unsuccessful; non-zero otherwise.
+ *
+ * Reads:  nr.*
+ * Writes: nr.*
+ */
+
+inline int    RHO_HEST_REFC::ensureCapacity(unsigned N, double beta){
+    if(N == 0){
+        /* Clear. */
+        nr.tbl.clear();
+        nr.size = 0;
+    }else if(nr.beta != beta){
+        /* Beta changed. Redo all the work. */
+        nr.tbl.resize(N);
+        nr.beta = beta;
+        sacInitNonRand(nr.beta, 0, N, &nr.tbl[0]);
+        nr.size = N;
+    }else if(N > nr.size){
+        /* Work is partially done. Do rest of it. */
+        nr.tbl.resize(N);
+        sacInitNonRand(nr.beta, nr.size, N, &nr.tbl[nr.size]);
+        nr.size = N;
+    }else{
+        /* Work is already done. Do nothing. */
+    }
+
+    return 1;
+}
+
+
+/**
+ * Estimates the homography using the given context, matches and parameters to
+ * PROSAC.
+ *
+ * @param [in]     src     The pointer to the source points of the matches.
+ *                             Must be aligned to 4 bytes. Cannot be NULL.
+ * @param [in]     dst     The pointer to the destination points of the matches.
+ *                             Must be aligned to 4 bytes. Cannot be NULL.
+ * @param [out]    inl     The pointer to the output mask of inlier matches.
+ *                             Must be aligned to 4 bytes. May be NULL.
+ * @param [in]     N       The number of matches.
+ * @param [in]     maxD    The maximum distance.
+ * @param [in]     maxI    The maximum number of PROSAC iterations.
+ * @param [in]     rConvg  The RANSAC convergence parameter.
+ * @param [in]     cfd     The required confidence in the solution.
+ * @param [in]     minInl  The minimum required number of inliers.
+ * @param [in]     beta    The beta-parameter for the non-randomness criterion.
+ * @param [in]     flags   A union of flags to control the estimation.
+ * @param [in]     guessH  An extrinsic guess at the solution H, or NULL if
+ *                         none provided.
+ * @param [out]    finalH  The final estimation of H, or the zero matrix if
+ *                         the minimum number of inliers was not met.
+ *                         Cannot be NULL.
+ * @return                 The number of inliers if the minimum number of
+ *                         inliers for acceptance was reached; 0 otherwise.
+ */
+
+unsigned RHO_HEST_REFC::rhoHest(const float*   src,     /* Source points */
+                                const float*   dst,     /* Destination points */
+                                char*          inl,     /* Inlier mask */
+                                unsigned       N,       /*  = src.length = dst.length = inl.length */
+                                float          maxD,    /* Works:     3.0 */
+                                unsigned       maxI,    /* Works:    2000 */
+                                unsigned       rConvg,  /* Works:    2000 */
+                                double         cfd,     /* Works:   0.995 */
+                                unsigned       minInl,  /* Minimum:     4 */
+                                double         beta,    /* Works:    0.35 */
+                                unsigned       flags,   /* Works:       0 */
+                                const float*   guessH,  /* Extrinsic guess, NULL if none provided */
+                                float*         finalH){ /* Final result. */
+
+    /**
+     * Setup
+     */
+
+    arg.src     = src;
+    arg.dst     = dst;
+    arg.inl     = inl;
+    arg.N       = N;
+    arg.maxD    = maxD;
+    arg.maxI    = maxI;
+    arg.rConvg  = rConvg;
+    arg.cfd     = cfd;
+    arg.minInl  = minInl;
+    arg.beta    = beta;
+    arg.flags   = flags;
+    arg.guessH  = guessH;
+    arg.finalH  = finalH;
+    if(!initRun()){
+        outputZeroH();
+        finiRun();
+        return 0;
+    }
+
+    /**
+     * Extrinsic Guess
+     */
+
+    if(haveExtrinsicGuess()){
+        verify();
+    }
+
+
+    /**
+     * PROSAC Loop
+     */
+
+    for(ctrl.i=0; ctrl.i < arg.maxI || ctrl.i < 100; ctrl.i++){
+        hypothesize() && verify();
+    }
+
+
+    /**
+     * Teardown
+     */
+
+    if(isFinalRefineEnabled() && canRefine()){
+        refine();
+    }
+
+    outputModel();
+    finiRun();
+    return isBestModelGoodEnough() ? best.numInl : 0;
+}
+
+
+/**
+ * Allocate per-object dynamic storage.
+ *
+ * This includes aligned, fixed-size internal buffers, but excludes any buffers
+ * whose size cannot be determined ahead-of-time (before the number of matches
+ * is known).
+ *
+ * All buffer memory is allocated in one single shot, and all pointers are
+ * initialized.
+ */
+
+inline void   RHO_HEST_REFC::allocatePerObj(void){
+    /* We have known sizes */
+    size_t ctrl_smpl_sz   = SMPL_SIZE*sizeof(*ctrl.smpl);
+    size_t curr_pkdPts_sz = SMPL_SIZE*2*2*sizeof(*curr.pkdPts);
+    size_t curr_H_sz      = HSIZE;
+    size_t best_H_sz      = HSIZE;
+    size_t lm_JtJ_sz      = 8*8*sizeof(float);
+    size_t lm_tmp1_sz     = 8*8*sizeof(float);
+    size_t lm_Jte_sz      = 1*8*sizeof(float);
+
+    /* We compute offsets */
+    size_t total = 0;
+#define MK_OFFSET(v)                                     \
+    size_t v ## _of = total;                             \
+    total = alignSize(v ## _of  +  v ## _sz, MEM_ALIGN)
+
+    MK_OFFSET(ctrl_smpl);
+    MK_OFFSET(curr_pkdPts);
+    MK_OFFSET(curr_H);
+    MK_OFFSET(best_H);
+    MK_OFFSET(lm_JtJ);
+    MK_OFFSET(lm_tmp1);
+    MK_OFFSET(lm_Jte);
+
+#undef MK_OFFSET
+
+    /* Allocate dynamic memory managed by cv::Mat */
+    mem.perObj.create(1, (int)(total + MEM_ALIGN), CV_8UC1);
+
+    /* Extract aligned pointer */
+    unsigned char* ptr = alignPtr(mem.perObj.data, MEM_ALIGN);
+
+    /* Assign pointers */
+    ctrl.smpl   = (unsigned*)  (ptr + ctrl_smpl_of);
+    curr.pkdPts = (float*)     (ptr + curr_pkdPts_of);
+    curr.H      = (float*)     (ptr + curr_H_of);
+    best.H      = (float*)     (ptr + best_H_of);
+    lm.JtJ      = (float(*)[8])(ptr + lm_JtJ_of);
+    lm.tmp1     = (float(*)[8])(ptr + lm_tmp1_of);
+    lm.Jte      = (float*)     (ptr + lm_Jte_of);
+}
+
+
+/**
+ * Allocate per-run dynamic storage.
+ *
+ * This includes storage that is proportional to the number of points, such as
+ * the inlier mask.
+ */
+
+inline void   RHO_HEST_REFC::allocatePerRun(void){
+    /* We have known sizes */
+    size_t best_inl_sz = arg.N;
+    size_t curr_inl_sz = arg.N;
+
+    /* We compute offsets */
+    size_t total = 0;
+#define MK_OFFSET(v)                                     \
+    size_t v ## _of = total;                             \
+    total = alignSize(v ## _of  +  v ## _sz, MEM_ALIGN)
+
+    MK_OFFSET(best_inl);
+    MK_OFFSET(curr_inl);
+
+#undef MK_OFFSET
+
+    /* Allocate dynamic memory managed by cv::Mat */
+    mem.perRun.create(1, (int)(total + MEM_ALIGN), CV_8UC1);
+
+    /* Extract aligned pointer */
+    unsigned char* ptr = alignPtr(mem.perRun.data, MEM_ALIGN);
+
+    /* Assign pointers */
+    best.inl  = (char*)(ptr + best_inl_of);
+    curr.inl  = (char*)(ptr + curr_inl_of);
+}
+
+
+/**
+ * Deallocate per-run dynamic storage.
+ *
+ * Undoes the work by allocatePerRun().
+ */
+
+inline void   RHO_HEST_REFC::deallocatePerRun(void){
+    best.inl  = NULL;
+    curr.inl  = NULL;
+
+    mem.perRun.release();
+}
+
+
+/**
+ * Deallocate per-object dynamic storage.
+ *
+ * Undoes the work by allocatePerObj().
+ */
+
+inline void   RHO_HEST_REFC::deallocatePerObj(void){
+    ctrl.smpl   = NULL;
+    curr.pkdPts = NULL;
+    curr.H      = NULL;
+    best.H      = NULL;
+    lm.JtJ      = NULL;
+    lm.tmp1     = NULL;
+    lm.Jte      = NULL;
+
+    mem.perObj.release();
+}
+
+
+/**
+ * Initialize SAC for a run given its arguments.
+ *
+ * Performs sanity-checks and memory allocations. Also initializes the state.
+ *
+ * @returns 0 if per-run initialization failed at any point; non-zero
+ *          otherwise.
+ *
+ * Reads:  arg.*, nr.*
+ * Writes: curr.*, best.*, ctrl.*, eval.*
+ */
+
+inline int    RHO_HEST_REFC::initRun(void){
+    /**
+     * Sanitize arguments.
+     *
+     * Runs zeroth because these are easy-to-check errors and unambiguously
+     * mean something or other.
+     */
+
+    if(!arg.src || !arg.dst){
+        /* Arguments src or dst are insane, must be != NULL */
+        return 0;
+    }
+    if(arg.N < (unsigned)SMPL_SIZE){
+        /* Argument N is insane, must be >= 4. */
+        return 0;
+    }
+    if(arg.maxD < 0){
+        /* Argument maxD is insane, must be >= 0. */
+        return 0;
+    }
+    if(arg.cfd < 0 || arg.cfd > 1){
+        /* Argument cfd is insane, must be in [0, 1]. */
+        return 0;
+    }
+    /* Clamp minInl to 4 or higher. */
+    arg.minInl = arg.minInl < (unsigned)SMPL_SIZE ? SMPL_SIZE : arg.minInl;
+    if(isNREnabled() && (arg.beta <= 0 || arg.beta >= 1)){
+        /* Argument beta is insane, must be in (0, 1). */
+        return 0;
+    }
+    if(!arg.finalH){
+        /* Argument finalH is insane, must be != NULL */
+        return 0;
+    }
+
+    /**
+     * Optional NR setup.
+     *
+     * Runs first because it is decoupled from most other things (*) and if it
+     * fails, it is easy to recover from.
+     *
+     * (*) The only things this code depends on is the flags argument, the nr.*
+     *     substruct and the sanity-checked N and beta arguments from above.
+     */
+
+    if(isNREnabled() && !ensureCapacity(arg.N, arg.beta)){
+        return 0;
+    }
+
+    /**
+     * Inlier mask alloc.
+     *
+     * Runs second because we want to quit as fast as possible if we can't even
+     * allocate the two masks.
+     */
+
+    allocatePerRun();
+
+    memset(best.inl, 0, arg.N);
+    memset(curr.inl, 0, arg.N);
+
+    /**
+     * Reset scalar per-run state.
+     *
+     * Runs third because there's no point in resetting/calculating a large
+     * number of fields if something in the above junk failed.
+     */
+
+    ctrl.i            = 0;
+    ctrl.phNum        = SMPL_SIZE;
+    ctrl.phEndI       = 1;
+    ctrl.phEndFpI     = sacInitPEndFpI(arg.rConvg, arg.N, SMPL_SIZE);
+    ctrl.phMax        = arg.N;
+    ctrl.phNumInl     = 0;
+    ctrl.numModels    = 0;
+
+    if(haveExtrinsicGuess()){
+        memcpy(curr.H, arg.guessH, HSIZE);
+    }else{
+        memset(curr.H, 0, HSIZE);
+    }
+    curr.numInl       = 0;
+
+    memset(best.H, 0, HSIZE);
+    best.numInl       = 0;
+
+    eval.Ntested      = 0;
+    eval.Ntestedtotal = 0;
+    eval.good         = 1;
+    eval.t_M          = SPRT_T_M;
+    eval.m_S          = SPRT_M_S;
+    eval.epsilon      = SPRT_EPSILON;
+    eval.delta        = SPRT_DELTA;
+    designSPRTTest();
+
+    return 1;
+}
+
+/**
+ * Finalize SAC run.
+ *
+ * Deallocates per-run allocatable resources. Currently this consists only of
+ * the best and current inlier masks, which are equal in size to p->arg.N
+ * bytes.
+ *
+ * Reads:  arg.bestInl, curr.inl, best.inl
+ * Writes: curr.inl, best.inl
+ */
+
+inline void   RHO_HEST_REFC::finiRun(void){
+    deallocatePerRun();
+}
+
+/**
+ * Hypothesize a model.
+ *
+ * Selects randomly a sample (within the rules of PROSAC) and generates a
+ * new current model, and applies degeneracy tests to it.
+ *
+ * @returns 0 if hypothesized model could be rejected early as degenerate, and
+ * non-zero otherwise.
+ */
+
+inline int    RHO_HEST_REFC::hypothesize(void){
+    if(PROSACPhaseEndReached()){
+        PROSACGoToNextPhase();
+    }
+
+    getPROSACSample();
+    if(isSampleDegenerate()){
+        return 0;
+    }
+
+    generateModel();
+    if(isModelDegenerate()){
+        return 0;
+    }
+
+    return 1;
+}
+
+/**
+ * Verify the hypothesized model.
+ *
+ * Given the current model, evaluate its quality. If it is better than
+ * everything before, save as new best model (and possibly refine it).
+ *
+ * Returns 1.
+ */
+
+inline int    RHO_HEST_REFC::verify(void){
+    evaluateModelSPRT();
+    updateSPRT();
+
+    if(isBestModel()){
+        saveBestModel();
+
+        if(isRefineEnabled() && canRefine()){
+            refine();
+        }
+
+        updateBounds();
+
+        if(isNREnabled()){
+            nStarOptimize();
+        }
+    }
+
+    return 1;
+}
+
+/**
+ * Check whether extrinsic guess was provided or not.
+ *
+ * @return Zero if no extrinsic guess was provided; non-zero otherwiseEE.
+ */
+
+inline int    RHO_HEST_REFC::haveExtrinsicGuess(void){
+    return !!arg.guessH;
+}
+
+/**
+ * Check whether non-randomness criterion is enabled.
+ *
+ * @return Zero if non-randomness criterion disabled; non-zero if not.
+ */
+
+inline int    RHO_HEST_REFC::isNREnabled(void){
+    return arg.flags & RHO_FLAG_ENABLE_NR;
+}
+
+/**
+ * Check whether best-model-so-far refinement is enabled.
+ *
+ * @return Zero if best-model-so-far refinement disabled; non-zero if not.
+ */
+
+inline int    RHO_HEST_REFC::isRefineEnabled(void){
+    return arg.flags & RHO_FLAG_ENABLE_REFINEMENT;
+}
+
+/**
+ * Check whether final-model refinement is enabled.
+ *
+ * @return Zero if final-model refinement disabled; non-zero if not.
+ */
+
+inline int    RHO_HEST_REFC::isFinalRefineEnabled(void){
+    return arg.flags & RHO_FLAG_ENABLE_FINAL_REFINEMENT;
+}
+
+/**
+ * Computes whether the end of the current PROSAC phase has been reached. At
+ * PROSAC phase phNum, only matches [0, phNum) are sampled from.
+ *
+ * Reads    (direct): ctrl.i, ctrl.phEndI, ctrl.phNum, ctrl.phMax
+ * Reads   (callees): None.
+ * Writes   (direct): None.
+ * Writes  (callees): None.
+ */
+
+inline int    RHO_HEST_REFC::PROSACPhaseEndReached(void){
+    return ctrl.i >= ctrl.phEndI && ctrl.phNum < ctrl.phMax;
+}
+
+/**
+ * Updates unconditionally the necessary fields to move to the next PROSAC
+ * stage.
+ *
+ * Not idempotent.
+ *
+ * Reads    (direct): ctrl.phNum, ctrl.phEndFpI, ctrl.phEndI
+ * Reads   (callees): None.
+ * Writes   (direct): ctrl.phNum, ctrl.phEndFpI, ctrl.phEndI
+ * Writes  (callees): None.
+ */
+
+inline void   RHO_HEST_REFC::PROSACGoToNextPhase(void){
+    double next;
+
+    ctrl.phNum++;
+    next = (ctrl.phEndFpI * ctrl.phNum)/(ctrl.phNum - SMPL_SIZE);
+    ctrl.phEndI  += (unsigned)ceil(next - ctrl.phEndFpI);
+    ctrl.phEndFpI = next;
+}
+
+/**
+ * Get a sample according to PROSAC rules. Namely:
+ * - If we're past the phase end interation, select randomly 4 out of the first
+ *   phNum matches.
+ * - Otherwise, select match phNum-1 and select randomly the 3 others out of
+ *   the first phNum-1 matches.
+ *
+ * Reads    (direct): ctrl.i, ctrl.phEndI, ctrl.phNum
+ * Reads   (callees): prng.s
+ * Writes   (direct): ctrl.smpl
+ * Writes  (callees): prng.s
+ */
+
+inline void   RHO_HEST_REFC::getPROSACSample(void){
+    if(ctrl.i > ctrl.phEndI){
+        /* FIXME: Dubious. Review. */
+        rndSmpl(4, ctrl.smpl, ctrl.phNum);/* Used to be phMax */
+    }else{
+        rndSmpl(3, ctrl.smpl, ctrl.phNum-1);
+        ctrl.smpl[3] = ctrl.phNum-1;
+    }
+}
+
+/**
+ * Choose, without repetition, sampleSize integers in the range [0, numDataPoints).
+ *
+ * Reads    (direct): None.
+ * Reads   (callees): prng.s
+ * Writes   (direct): None.
+ * Writes  (callees): prng.s
+ */
+
+inline void   RHO_HEST_REFC::rndSmpl(unsigned  sampleSize,
+                                     unsigned* currentSample,
+                                     unsigned  dataSetSize){
+    /**
+     * If sampleSize is very close to dataSetSize, we use selection sampling.
+     * Otherwise we use the naive sampling technique wherein we select random
+     * indexes until sampleSize of them are distinct.
+     */
+
+    if(sampleSize*2>dataSetSize){
+        /**
+         * Selection Sampling:
+         *
+         * Algorithm S (Selection sampling technique). To select n records at random from a set of N, where 0 < n ≤ N.
+         * S1. [Initialize.] Set t ← 0, m ← 0. (During this algorithm, m represents the number of records selected so far,
+         *                                      and t is the total number of input records that we have dealt with.)
+         * S2. [Generate U.] Generate a random number U, uniformly distributed between zero and one.
+         * S3. [Test.] If (N – t)U ≥ n – m, go to step S5.
+         * S4. [Select.] Select the next record for the sample, and increase m and t by 1. If m < n, go to step S2;
+         *               otherwise the sample is complete and the algorithm terminates.
+         * S5. [Skip.] Skip the next record (do not include it in the sample), increase t by 1, and go back to step S2.
+         *
+         * Replaced m with i and t with j in the below code.
+         */
+
+        unsigned i=0,j=0;
+
+        for(i=0;i<sampleSize;j++){
+            double U=fastRandom();
+            if((dataSetSize-j)*U < (sampleSize-i)){
+                currentSample[i++]=j;
+            }
+        }
+    }else{
+        /**
+         * Naive sampling technique. Generate indexes until sampleSize of them are distinct.
+         */
+
+        unsigned i, j;
+        for(i=0;i<sampleSize;i++){
+            int inList;
+
+            do{
+                currentSample[i] = (unsigned)(dataSetSize*fastRandom());
+
+                inList=0;
+                for(j=0;j<i;j++){
+                    if(currentSample[i] == currentSample[j]){
+                        inList=1;
+                        break;
+                    }
+                }
+            }while(inList);
+        }
+    }
+}
+
+/**
+ * Checks whether the *sample* is degenerate prior to model generation.
+ * - First, the extremely cheap numerical degeneracy test is run, which weeds
+ *   out bad samples to the optimized GE implementation.
+ * - Second, the geometrical degeneracy test is run, which weeds out most other
+ *   bad samples.
+ *
+ * Reads    (direct): ctrl.smpl, arg.src, arg.dst
+ * Reads   (callees): None.
+ * Writes   (direct): curr.pkdPts
+ * Writes  (callees): None.
+ */
+
+inline int    RHO_HEST_REFC::isSampleDegenerate(void){
+    unsigned i0 = ctrl.smpl[0], i1 = ctrl.smpl[1], i2 = ctrl.smpl[2], i3 = ctrl.smpl[3];
+    typedef struct{float x,y;} MyPt2f;
+    MyPt2f* pkdPts = (MyPt2f*)curr.pkdPts, *src = (MyPt2f*)arg.src, *dst = (MyPt2f*)arg.dst;
+
+    /**
+     * Pack the matches selected by the SAC algorithm.
+     * Must be packed  points[0:7]  = {srcx0, srcy0, srcx1, srcy1, srcx2, srcy2, srcx3, srcy3}
+     *                 points[8:15] = {dstx0, dsty0, dstx1, dsty1, dstx2, dsty2, dstx3, dsty3}
+     * Gather 4 points into the vector
+     */
+
+    pkdPts[0] = src[i0];
+    pkdPts[1] = src[i1];
+    pkdPts[2] = src[i2];
+    pkdPts[3] = src[i3];
+    pkdPts[4] = dst[i0];
+    pkdPts[5] = dst[i1];
+    pkdPts[6] = dst[i2];
+    pkdPts[7] = dst[i3];
+
+    /**
+     * If the matches' source points have common x and y coordinates, abort.
+     */
+
+    if(pkdPts[0].x == pkdPts[1].x || pkdPts[1].x == pkdPts[2].x ||
+       pkdPts[2].x == pkdPts[3].x || pkdPts[0].x == pkdPts[2].x ||
+       pkdPts[1].x == pkdPts[3].x || pkdPts[0].x == pkdPts[3].x ||
+       pkdPts[0].y == pkdPts[1].y || pkdPts[1].y == pkdPts[2].y ||
+       pkdPts[2].y == pkdPts[3].y || pkdPts[0].y == pkdPts[2].y ||
+       pkdPts[1].y == pkdPts[3].y || pkdPts[0].y == pkdPts[3].y){
+        return 1;
+    }
+
+    /* If the matches do not satisfy the strong geometric constraint, abort. */
+    /* (0 x 1) * 2 */
+    float cross0s0 = pkdPts[0].y-pkdPts[1].y;
+    float cross0s1 = pkdPts[1].x-pkdPts[0].x;
+    float cross0s2 = pkdPts[0].x*pkdPts[1].y-pkdPts[0].y*pkdPts[1].x;
+    float dots0    = cross0s0*pkdPts[2].x + cross0s1*pkdPts[2].y + cross0s2;
+    float cross0d0 = pkdPts[4].y-pkdPts[5].y;
+    float cross0d1 = pkdPts[5].x-pkdPts[4].x;
+    float cross0d2 = pkdPts[4].x*pkdPts[5].y-pkdPts[4].y*pkdPts[5].x;
+    float dotd0    = cross0d0*pkdPts[6].x + cross0d1*pkdPts[6].y + cross0d2;
+    if(((int)dots0^(int)dotd0) < 0){
+        return 1;
+    }
+    /* (0 x 1) * 3 */
+    float cross1s0 = cross0s0;
+    float cross1s1 = cross0s1;
+    float cross1s2 = cross0s2;
+    float dots1    = cross1s0*pkdPts[3].x + cross1s1*pkdPts[3].y + cross1s2;
+    float cross1d0 = cross0d0;
+    float cross1d1 = cross0d1;
+    float cross1d2 = cross0d2;
+    float dotd1    = cross1d0*pkdPts[7].x + cross1d1*pkdPts[7].y + cross1d2;
+    if(((int)dots1^(int)dotd1) < 0){
+        return 1;
+    }
+    /* (2 x 3) * 0 */
+    float cross2s0 = pkdPts[2].y-pkdPts[3].y;
+    float cross2s1 = pkdPts[3].x-pkdPts[2].x;
+    float cross2s2 = pkdPts[2].x*pkdPts[3].y-pkdPts[2].y*pkdPts[3].x;
+    float dots2    = cross2s0*pkdPts[0].x + cross2s1*pkdPts[0].y + cross2s2;
+    float cross2d0 = pkdPts[6].y-pkdPts[7].y;
+    float cross2d1 = pkdPts[7].x-pkdPts[6].x;
+    float cross2d2 = pkdPts[6].x*pkdPts[7].y-pkdPts[6].y*pkdPts[7].x;
+    float dotd2    = cross2d0*pkdPts[4].x + cross2d1*pkdPts[4].y + cross2d2;
+    if(((int)dots2^(int)dotd2) < 0){
+        return 1;
+    }
+    /* (2 x 3) * 1 */
+    float cross3s0 = cross2s0;
+    float cross3s1 = cross2s1;
+    float cross3s2 = cross2s2;
+    float dots3    = cross3s0*pkdPts[1].x + cross3s1*pkdPts[1].y + cross3s2;
+    float cross3d0 = cross2d0;
+    float cross3d1 = cross2d1;
+    float cross3d2 = cross2d2;
+    float dotd3    = cross3d0*pkdPts[5].x + cross3d1*pkdPts[5].y + cross3d2;
+    if(((int)dots3^(int)dotd3) < 0){
+        return 1;
+    }
+
+    /* Otherwise, accept */
+    return 0;
+}
+
+/**
+ * Compute homography of matches in gathered, packed sample and output the
+ * current homography.
+ *
+ * Reads    (direct): None.
+ * Reads   (callees): curr.pkdPts
+ * Writes   (direct): None.
+ * Writes  (callees): curr.H
+ */
+
+inline void   RHO_HEST_REFC::generateModel(void){
+    hFuncRefC(curr.pkdPts, curr.H);
+}
+
+/**
+ * Checks whether the model is itself degenerate.
+ * - One test: All elements of the homography are added, and if the result is
+ *   NaN the homography is rejected.
+ *
+ * Reads    (direct): curr.H
+ * Reads   (callees): None.
+ * Writes   (direct): None.
+ * Writes  (callees): None.
+ */
+
+inline int    RHO_HEST_REFC::isModelDegenerate(void){
+    int degenerate;
+    float* H = curr.H;
+    float f=H[0]+H[1]+H[2]+H[3]+H[4]+H[5]+H[6]+H[7];
+
+    /* degenerate = isnan(f); */
+    /* degenerate = f!=f;// Only NaN is not equal to itself. */
+    degenerate = cvIsNaN(f);
+    /* degenerate = 0; */
+
+
+    return degenerate;
+}
+
+/**
+ * Evaluates the current model using SPRT for early exiting.
+ *
+ * Reads    (direct): arg.maxD, arg.src, arg.dst, arg.N, curr.inl, curr.H,
+ *                    ctrl.numModels, eval.Ntestedtotal, eval.lambdaAccept,
+ *                    eval.lambdaReject, eval.A
+ * Reads   (callees): None.
+ * Writes   (direct): ctrl.numModels, curr.numInl, eval.Ntested, eval.good,
+ *                    eval.Ntestedtotal
+ * Writes  (callees): None.
+ */
+
+inline void   RHO_HEST_REFC::evaluateModelSPRT(void){
+    unsigned i;
+    unsigned isInlier;
+    double   lambda  = 1.0;
+    float    distSq  = arg.maxD*arg.maxD;
+    const float* src = arg.src;
+    const float* dst = arg.dst;
+    char*    inl     = curr.inl;
+    const float*   H = curr.H;
+
+
+    ctrl.numModels++;
+
+    curr.numInl   = 0;
+    eval.Ntested  = 0;
+    eval.good     = 1;
+
+
+    /* SCALAR */
+    for(i=0;i<arg.N && eval.good;i++){
+        /* Backproject */
+        float x=src[i*2],y=src[i*2+1];
+        float X=dst[i*2],Y=dst[i*2+1];
+
+        float reprojX=H[0]*x+H[1]*y+H[2]; /*  ( X_1 )     ( H_11 H_12    H_13  ) (x_1)       */
+        float reprojY=H[3]*x+H[4]*y+H[5]; /*  ( X_2 )  =  ( H_21 H_22    H_23  ) (x_2)       */
+        float reprojZ=H[6]*x+H[7]*y+1.0f; /*  ( X_3 )     ( H_31 H_32 H_33=1.0 ) (x_3 = 1.0) */
+
+        /* reproj is in homogeneous coordinates. To bring back to "regular" coordinates, divide by Z. */
+        reprojX/=reprojZ;
+        reprojY/=reprojZ;
+
+        /* Compute distance */
+        reprojX-=X;
+        reprojY-=Y;
+        reprojX*=reprojX;
+        reprojY*=reprojY;
+        float reprojDist = reprojX+reprojY;
+
+        /* ... */
+        isInlier   = reprojDist <= distSq;
+        curr.numInl += isInlier;
+        *inl++     = (char)isInlier;
+
+
+        /* SPRT */
+        lambda *= isInlier ? eval.lambdaAccept : eval.lambdaReject;
+        eval.good = lambda <= eval.A;
+        /* If !good, the threshold A was exceeded, so we're rejecting */
+    }
+
+
+    eval.Ntested       = i;
+    eval.Ntestedtotal += i;
+}
+
+/**
+ * Update either the delta or epsilon SPRT parameters, depending on the events
+ * that transpired in the previous evaluation.
+ *
+ * Reads    (direct): eval.good, curr.numInl, arg.N, eval.Ntested, eval.delta
+ * Reads   (callees): eval.delta, eval.epsilon, eval.t_M, eval.m_S
+ * Writes   (direct): eval.epsilon, eval.delta
+ * Writes  (callees): eval.A, eval.lambdaReject, eval.lambdaAccept
+ */
+
+inline void   RHO_HEST_REFC::updateSPRT(void){
+    if(eval.good){
+        if(isBestModel()){
+            eval.epsilon = (double)curr.numInl/arg.N;
+            designSPRTTest();
+        }
+    }else{
+        double newDelta = (double)curr.numInl/eval.Ntested;
+
+        if(newDelta > 0){
+            double relChange = fabs(eval.delta - newDelta)/ eval.delta;
+            if(relChange > MIN_DELTA_CHNG){
+                eval.delta = newDelta;
+                designSPRTTest();
+            }
+        }
+    }
+}
+
+/**
+ * Numerically compute threshold A from the estimated delta, epsilon, t_M and
+ * m_S values.
+ *
+ * Epsilon:  Denotes the probability that a randomly chosen data point is
+ *           consistent with a good model.
+ * Delta:    Denotes the probability that a randomly chosen data point is
+ *           consistent with a bad model.
+ * t_M:      Time needed to instantiate a model hypotheses given a sample.
+ *           (Computing model parameters from a sample takes the same time
+ *            as verification of t_M data points)
+ * m_S:      The number of models that are verified per sample.
+ */
+
+static inline double sacDesignSPRTTest(double delta, double epsilon, double t_M, double m_S){
+    double An, C, K, prevAn;
+    unsigned i;
+
+    /**
+     * Randomized RANSAC with Sequential Probability Ratio Test, ICCV 2005
+     * Eq (2)
+     */
+
+    C = (1-delta)  *  log((1-delta)/(1-epsilon)) +
+        delta      *  log(  delta  /  epsilon  );
+
+    /**
+     * Randomized RANSAC with Sequential Probability Ratio Test, ICCV 2005
+     * Eq (6)
+     * K = K_1/K_2 + 1 = (t_M*C)/m_S + 1
+     */
+
+    K = t_M*C/m_S + 1;
+
+    /**
+     * Randomized RANSAC with Sequential Probability Ratio Test, ICCV 2005
+     * Paragraph below Eq (6)
+     *
+     * A* = lim_{n -> infty} A_n, where
+     *     A_0     = K1/K2 + 1             and
+     *     A_{n+1} = K1/K2 + 1 + log(A_n)
+     * The series converges fast, typically within four iterations.
+     */
+
+    An = K;
+    i  = 0;
+
+    do{
+        prevAn = An;
+        An = K + log(An);
+    }while((An-prevAn > 1.5e-8)  &&  (++i < 10));
+
+    /**
+     * Return A = An_stopping, with n_stopping < 10
+     */
+
+    return An;
+}
+
+/**
+ * Design the SPRT test. Shorthand for
+ *     A = sprt(delta, epsilon, t_M, m_S);
+ *
+ * Idempotent.
+ *
+ * Reads    (direct): eval.delta, eval.epsilon, eval.t_M, eval.m_S
+ * Reads   (callees): None.
+ * Writes   (direct): eval.A, eval.lambdaReject, eval.lambdaAccept.
+ * Writes  (callees): None.
+ */
+
+inline void   RHO_HEST_REFC::designSPRTTest(void){
+    eval.A = sacDesignSPRTTest(eval.delta, eval.epsilon, eval.t_M, eval.m_S);
+    eval.lambdaReject = ((1.0 - eval.delta) / (1.0 - eval.epsilon));
+    eval.lambdaAccept = ((   eval.delta   ) / (    eval.epsilon  ));
+}
+
+/**
+ * Return whether the current model is the best model so far.
+ *
+ * @return Non-zero if this is the model with the most inliers seen so far;
+ *         0 otherwise.
+ *
+ * Reads    (direct): curr.numInl, best.numInl
+ * Reads   (callees): None.
+ * Writes   (direct): None.
+ * Writes  (callees): None.
+ */
+
+inline int    RHO_HEST_REFC::isBestModel(void){
+    return curr.numInl > best.numInl;
+}
+
+/**
+ * Returns whether the current-best model is good enough to be an
+ * acceptable best model, by checking whether it meets the minimum
+ * number of inliers.
+ *
+ * @return Non-zero if the current model is "good enough" to save;
+ *         0 otherwise.
+ *
+ * Reads    (direct): best.numInl, arg.minInl
+ * Reads   (callees): None.
+ * Writes   (direct): None.
+ * Writes  (callees): None.
+ */
+
+inline int    RHO_HEST_REFC::isBestModelGoodEnough(void){
+    return best.numInl >= arg.minInl;
+}
+
+/**
+ * Make current model new best model by swapping the homography, inlier mask
+ * and count of inliers between the current and best models.
+ *
+ * Reads    (direct): curr.H, curr.inl, curr.numInl,
+ *                    best.H, best.inl, best.numInl
+ * Reads   (callees): None.
+ * Writes   (direct): curr.H, curr.inl, curr.numInl,
+ *                    best.H, best.inl, best.numInl
+ * Writes  (callees): None.
+ */
+
+inline void   RHO_HEST_REFC::saveBestModel(void){
+    float*   H      = curr.H;
+    char*    inl    = curr.inl;
+    unsigned numInl = curr.numInl;
+
+    curr.H       = best.H;
+    curr.inl     = best.inl;
+    curr.numInl  = best.numInl;
+
+    best.H       = H;
+    best.inl     = inl;
+    best.numInl  = numInl;
+}
+
+/**
+ * Compute NR table entries [start, N) for given beta.
+ */
+
+static inline void   sacInitNonRand(double    beta,
+                                    unsigned  start,
+                                    unsigned  N,
+                                    unsigned* nonRandMinInl){
+    unsigned n = SMPL_SIZE+1 > start ? SMPL_SIZE+1 : start;
+    double   beta_beta1_sq_chi = sqrt(beta*(1.0-beta)) * CHI_SQ;
+
+    for(; n < N; n++){
+        double   mu      = n * beta;
+        double   sigma   = sqrt((double)n)* beta_beta1_sq_chi;
+        unsigned i_min   = (unsigned)ceil(SMPL_SIZE + mu + sigma);
+
+        nonRandMinInl[n] = i_min;
+    }
+}
+
+/**
+ * Optimize the stopping criterion to account for the non-randomness criterion
+ * of PROSAC.
+ *
+ * Reads    (direct): arg.N, best.numInl, nr.tbl, arg.inl, ctrl.phMax,
+ *                    ctrl.phNumInl, arg.cfd, arg.maxI
+ * Reads   (callees): None.
+ * Writes   (direct): arg.maxI, ctrl.phMax, ctrl.phNumInl
+ * Writes  (callees): None.
+ */
+
+inline void   RHO_HEST_REFC::nStarOptimize(void){
+    unsigned min_sample_length = 10*2; /*(N * INLIERS_RATIO) */
+    unsigned best_n       = arg.N;
+    unsigned test_n       = best_n;
+    unsigned bestNumInl   = best.numInl;
+    unsigned testNumInl   = bestNumInl;
+
+    for(;test_n > min_sample_length && testNumInl;test_n--){
+        if(testNumInl*best_n > bestNumInl*test_n){
+            if(testNumInl < nr.tbl[test_n]){
+                break;
+            }
+            best_n      = test_n;
+            bestNumInl  = testNumInl;
+        }
+        testNumInl -= !!best.inl[test_n-1];
+    }
+
+    if(bestNumInl*ctrl.phMax > ctrl.phNumInl*best_n){
+        ctrl.phMax    = best_n;
+        ctrl.phNumInl = bestNumInl;
+        arg.maxI      = sacCalcIterBound(arg.cfd,
+                                         (double)ctrl.phNumInl/ctrl.phMax,
+                                         SMPL_SIZE,
+                                         arg.maxI);
+    }
+}
+
+/**
+ * Classic RANSAC iteration bound based on largest # of inliers.
+ *
+ * Reads    (direct): arg.maxI, arg.cfd, best.numInl, arg.N
+ * Reads   (callees): None.
+ * Writes   (direct): arg.maxI
+ * Writes  (callees): None.
+ */
+
+inline void   RHO_HEST_REFC::updateBounds(void){
+    arg.maxI = sacCalcIterBound(arg.cfd,
+                                (double)best.numInl/arg.N,
+                                SMPL_SIZE,
+                                arg.maxI);
+}
+
+/**
+ * Ouput the best model so far to the output argument.
+ *
+ * Reads    (direct): arg.finalH, best.H, arg.inl, best.inl, arg.N
+ * Reads   (callees): arg.finalH, arg.inl, arg.N
+ * Writes   (direct): arg.finalH, arg.inl
+ * Writes  (callees): arg.finalH, arg.inl
+ */
+
+inline void   RHO_HEST_REFC::outputModel(void){
+    if(isBestModelGoodEnough()){
+        memcpy(arg.finalH, best.H, HSIZE);
+        if(arg.inl){
+            memcpy(arg.inl, best.inl, arg.N);
+        }
+    }else{
+        outputZeroH();
+    }
+}
+
+/**
+ * Ouput a zeroed H to the output argument.
+ *
+ * Reads    (direct): arg.finalH, arg.inl, arg.N
+ * Reads   (callees): None.
+ * Writes   (direct): arg.finalH, arg.inl
+ * Writes  (callees): None.
+ */
+
+inline void   RHO_HEST_REFC::outputZeroH(void){
+    if(arg.finalH){
+        memset(arg.finalH, 0, HSIZE);
+    }
+    if(arg.inl){
+        memset(arg.inl,    0, arg.N);
+    }
+}
+
+/**
+ * Compute the real-valued number of samples per phase, given the RANSAC convergence speed,
+ * data set size and sample size.
+ */
+
+static inline double sacInitPEndFpI(const unsigned ransacConvg,
+                                    const unsigned n,
+                                    const unsigned s){
+    double numer=1, denom=1;
+
+    unsigned i;
+    for(i=0;i<s;i++){
+        numer *= s-i;
+        denom *= n-i;
+    }
+
+    return ransacConvg*numer/denom;
+}
+
+/**
+ * Estimate the number of iterations required based on the requested confidence,
+ * proportion of inliers in the best model so far and sample size.
+ *
+ * Clamp return value at maxIterationBound.
+ */
+
+static inline unsigned sacCalcIterBound(double   confidence,
+                                        double   inlierRate,
+                                        unsigned sampleSize,
+                                        unsigned maxIterBound){
+    unsigned retVal;
+
+    /**
+     * Formula chosen from http://en.wikipedia.org/wiki/RANSAC#The_parameters :
+     *
+     * \[ k = \frac{\log{(1-confidence)}}{\log{(1-inlierRate**sampleSize)}} \]
+     */
+
+    double atLeastOneOutlierProbability = 1.-pow(inlierRate, (double)sampleSize);
+
+    /**
+     * There are two special cases: When argument to log() is 0 and when it is 1.
+     * Each has a special meaning.
+     */
+
+    if(atLeastOneOutlierProbability>=1.){
+        /**
+         * A certainty of picking at least one outlier means that we will need
+         * an infinite amount of iterations in order to find a correct solution.
+         */
+
+        retVal = maxIterBound;
+    }else if(atLeastOneOutlierProbability<=0.){
+        /**
+         * The certainty of NOT picking an outlier means that only 1 iteration
+         * is needed to find a solution.
+         */
+
+        retVal = 1;
+    }else{
+        /**
+         * Since 1-confidence (the probability of the model being based on at
+         * least one outlier in the data) is equal to
+         * (1-inlierRate**sampleSize)**numIterations (the probability of picking
+         * at least one outlier in numIterations samples), we can isolate
+         * numIterations (the return value) into
+         */
+
+        retVal = (unsigned)ceil(log(1.-confidence)/log(atLeastOneOutlierProbability));
+    }
+
+    /**
+     * Clamp to maxIterationBound.
+     */
+
+    return retVal <= maxIterBound ? retVal : maxIterBound;
+}
+
+
+/**
+ * Given 4 matches, computes the homography that relates them using Gaussian
+ * Elimination. The row operations are as given in the paper.
+ *
+ * TODO: Clean this up. The code is hideous, and might even conceal sign bugs
+ *       (specifically relating to whether the last column should be negated,
+ *        or not).
+ */
+
+static void hFuncRefC(float* packedPoints,/* Source (four x,y float coordinates) points followed by
+                                             destination (four x,y float coordinates) points, aligned by 32 bytes */
+                      float* H){          /* Homography (three 16-byte aligned rows of 3 floats) */
+    float x0=*packedPoints++;
+    float y0=*packedPoints++;
+    float x1=*packedPoints++;
+    float y1=*packedPoints++;
+    float x2=*packedPoints++;
+    float y2=*packedPoints++;
+    float x3=*packedPoints++;
+    float y3=*packedPoints++;
+    float X0=*packedPoints++;
+    float Y0=*packedPoints++;
+    float X1=*packedPoints++;
+    float Y1=*packedPoints++;
+    float X2=*packedPoints++;
+    float Y2=*packedPoints++;
+    float X3=*packedPoints++;
+    float Y3=*packedPoints++;
+
+    float x0X0=x0*X0, x1X1=x1*X1, x2X2=x2*X2, x3X3=x3*X3;
+    float x0Y0=x0*Y0, x1Y1=x1*Y1, x2Y2=x2*Y2, x3Y3=x3*Y3;
+    float y0X0=y0*X0, y1X1=y1*X1, y2X2=y2*X2, y3X3=y3*X3;
+    float y0Y0=y0*Y0, y1Y1=y1*Y1, y2Y2=y2*Y2, y3Y3=y3*Y3;
+
+
+    /**
+     *  [0]   [1] Hidden   Prec
+     *  x0    y0    1       x1
+     *  x1    y1    1       x1
+     *  x2    y2    1       x1
+     *  x3    y3    1       x1
+     *
+     * Eliminate ones in column 2 and 5.
+     * R(0)-=R(2)
+     * R(1)-=R(2)
+     * R(3)-=R(2)
+     *
+     *  [0]   [1] Hidden   Prec
+     * x0-x2 y0-y2  0       x1+1
+     * x1-x2 y1-y2  0       x1+1
+     *  x2    y2    1       x1
+     * x3-x2 y3-y2  0       x1+1
+     *
+     * Eliminate column 0 of rows 1 and 3
+     * R(1)=(x0-x2)*R(1)-(x1-x2)*R(0),     y1'=(y1-y2)(x0-x2)-(x1-x2)(y0-y2)
+     * R(3)=(x0-x2)*R(3)-(x3-x2)*R(0),     y3'=(y3-y2)(x0-x2)-(x3-x2)(y0-y2)
+     *
+     *  [0]   [1] Hidden   Prec
+     * x0-x2 y0-y2  0      x1+1
+     *   0    y1'   0      x2+3
+     *  x2    y2    1       x1
+     *   0    y3'   0      x2+3
+     *
+     * Eliminate column 1 of rows 0 and 3
+     * R(3)=y1'*R(3)-y3'*R(1)
+     * R(0)=y1'*R(0)-(y0-y2)*R(1)
+     *
+     *  [0]   [1] Hidden   Prec
+     *  x0'    0    0      x3+5
+     *   0    y1'   0      x2+3
+     *  x2    y2    1       x1
+     *   0     0    0      x4+7
+     *
+     * Eliminate columns 0 and 1 of row 2
+     * R(0)/=x0'
+     * R(1)/=y1'
+     * R(2)-= (x2*R(0) + y2*R(1))
+     *
+     *  [0]   [1] Hidden   Prec
+     *   1     0    0      x6+10
+     *   0     1    0      x4+6
+     *   0     0    1      x4+7
+     *   0     0    0      x4+7
+     */
+
+    /**
+     * Eliminate ones in column 2 and 5.
+     * R(0)-=R(2)
+     * R(1)-=R(2)
+     * R(3)-=R(2)
+     */
+
+    /*float minor[4][2] = {{x0-x2,y0-y2},
+                         {x1-x2,y1-y2},
+                         {x2   ,y2   },
+                         {x3-x2,y3-y2}};*/
+    /*float major[8][3] = {{x2X2-x0X0,y2X2-y0X0,(X0-X2)},
+                         {x2X2-x1X1,y2X2-y1X1,(X1-X2)},
+                         {-x2X2    ,-y2X2    ,(X2   )},
+                         {x2X2-x3X3,y2X2-y3X3,(X3-X2)},
+                         {x2Y2-x0Y0,y2Y2-y0Y0,(Y0-Y2)},
+                         {x2Y2-x1Y1,y2Y2-y1Y1,(Y1-Y2)},
+                         {-x2Y2    ,-y2Y2    ,(Y2   )},
+                         {x2Y2-x3Y3,y2Y2-y3Y3,(Y3-Y2)}};*/
+    float minor[2][4] = {{x0-x2,x1-x2,x2   ,x3-x2},
+                         {y0-y2,y1-y2,y2   ,y3-y2}};
+    float major[3][8] = {{x2X2-x0X0,x2X2-x1X1,-x2X2    ,x2X2-x3X3,x2Y2-x0Y0,x2Y2-x1Y1,-x2Y2    ,x2Y2-x3Y3},
+                         {y2X2-y0X0,y2X2-y1X1,-y2X2    ,y2X2-y3X3,y2Y2-y0Y0,y2Y2-y1Y1,-y2Y2    ,y2Y2-y3Y3},
+                         { (X0-X2) , (X1-X2) , (X2   ) , (X3-X2) , (Y0-Y2) , (Y1-Y2) , (Y2   ) , (Y3-Y2) }};
+
+    /**
+     * int i;
+     * for(i=0;i<8;i++) major[2][i]=-major[2][i];
+     * Eliminate column 0 of rows 1 and 3
+     * R(1)=(x0-x2)*R(1)-(x1-x2)*R(0),     y1'=(y1-y2)(x0-x2)-(x1-x2)(y0-y2)
+     * R(3)=(x0-x2)*R(3)-(x3-x2)*R(0),     y3'=(y3-y2)(x0-x2)-(x3-x2)(y0-y2)
+     */
+
+    float scalar1=minor[0][0], scalar2=minor[0][1];
+    minor[1][1]=minor[1][1]*scalar1-minor[1][0]*scalar2;
+
+    major[0][1]=major[0][1]*scalar1-major[0][0]*scalar2;
+    major[1][1]=major[1][1]*scalar1-major[1][0]*scalar2;
+    major[2][1]=major[2][1]*scalar1-major[2][0]*scalar2;
+
+    major[0][5]=major[0][5]*scalar1-major[0][4]*scalar2;
+    major[1][5]=major[1][5]*scalar1-major[1][4]*scalar2;
+    major[2][5]=major[2][5]*scalar1-major[2][4]*scalar2;
+
+    scalar2=minor[0][3];
+    minor[1][3]=minor[1][3]*scalar1-minor[1][0]*scalar2;
+
+    major[0][3]=major[0][3]*scalar1-major[0][0]*scalar2;
+    major[1][3]=major[1][3]*scalar1-major[1][0]*scalar2;
+    major[2][3]=major[2][3]*scalar1-major[2][0]*scalar2;
+
+    major[0][7]=major[0][7]*scalar1-major[0][4]*scalar2;
+    major[1][7]=major[1][7]*scalar1-major[1][4]*scalar2;
+    major[2][7]=major[2][7]*scalar1-major[2][4]*scalar2;
+
+    /**
+     * Eliminate column 1 of rows 0 and 3
+     * R(3)=y1'*R(3)-y3'*R(1)
+     * R(0)=y1'*R(0)-(y0-y2)*R(1)
+     */
+
+    scalar1=minor[1][1];scalar2=minor[1][3];
+    major[0][3]=major[0][3]*scalar1-major[0][1]*scalar2;
+    major[1][3]=major[1][3]*scalar1-major[1][1]*scalar2;
+    major[2][3]=major[2][3]*scalar1-major[2][1]*scalar2;
+
+    major[0][7]=major[0][7]*scalar1-major[0][5]*scalar2;
+    major[1][7]=major[1][7]*scalar1-major[1][5]*scalar2;
+    major[2][7]=major[2][7]*scalar1-major[2][5]*scalar2;
+
+    scalar2=minor[1][0];
+    minor[0][0]=minor[0][0]*scalar1-minor[0][1]*scalar2;
+
+    major[0][0]=major[0][0]*scalar1-major[0][1]*scalar2;
+    major[1][0]=major[1][0]*scalar1-major[1][1]*scalar2;
+    major[2][0]=major[2][0]*scalar1-major[2][1]*scalar2;
+
+    major[0][4]=major[0][4]*scalar1-major[0][5]*scalar2;
+    major[1][4]=major[1][4]*scalar1-major[1][5]*scalar2;
+    major[2][4]=major[2][4]*scalar1-major[2][5]*scalar2;
+
+    /**
+     * Eliminate columns 0 and 1 of row 2
+     * R(0)/=x0'
+     * R(1)/=y1'
+     * R(2)-= (x2*R(0) + y2*R(1))
+     */
+
+    scalar1=1.0f/minor[0][0];
+    major[0][0]*=scalar1;
+    major[1][0]*=scalar1;
+    major[2][0]*=scalar1;
+    major[0][4]*=scalar1;
+    major[1][4]*=scalar1;
+    major[2][4]*=scalar1;
+
+    scalar1=1.0f/minor[1][1];
+    major[0][1]*=scalar1;
+    major[1][1]*=scalar1;
+    major[2][1]*=scalar1;
+    major[0][5]*=scalar1;
+    major[1][5]*=scalar1;
+    major[2][5]*=scalar1;
+
+
+    scalar1=minor[0][2];scalar2=minor[1][2];
+    major[0][2]-=major[0][0]*scalar1+major[0][1]*scalar2;
+    major[1][2]-=major[1][0]*scalar1+major[1][1]*scalar2;
+    major[2][2]-=major[2][0]*scalar1+major[2][1]*scalar2;
+
+    major[0][6]-=major[0][4]*scalar1+major[0][5]*scalar2;
+    major[1][6]-=major[1][4]*scalar1+major[1][5]*scalar2;
+    major[2][6]-=major[2][4]*scalar1+major[2][5]*scalar2;
+
+    /* Only major matters now. R(3) and R(7) correspond to the hollowed-out rows. */
+    scalar1=major[0][7];
+    major[1][7]/=scalar1;
+    major[2][7]/=scalar1;
+
+    scalar1=major[0][0];major[1][0]-=scalar1*major[1][7];major[2][0]-=scalar1*major[2][7];
+    scalar1=major[0][1];major[1][1]-=scalar1*major[1][7];major[2][1]-=scalar1*major[2][7];
+    scalar1=major[0][2];major[1][2]-=scalar1*major[1][7];major[2][2]-=scalar1*major[2][7];
+    scalar1=major[0][3];major[1][3]-=scalar1*major[1][7];major[2][3]-=scalar1*major[2][7];
+    scalar1=major[0][4];major[1][4]-=scalar1*major[1][7];major[2][4]-=scalar1*major[2][7];
+    scalar1=major[0][5];major[1][5]-=scalar1*major[1][7];major[2][5]-=scalar1*major[2][7];
+    scalar1=major[0][6];major[1][6]-=scalar1*major[1][7];major[2][6]-=scalar1*major[2][7];
+
+
+    /* One column left (Two in fact, but the last one is the homography) */
+    scalar1=major[1][3];
+
+    major[2][3]/=scalar1;
+    scalar1=major[1][0];major[2][0]-=scalar1*major[2][3];
+    scalar1=major[1][1];major[2][1]-=scalar1*major[2][3];
+    scalar1=major[1][2];major[2][2]-=scalar1*major[2][3];
+    scalar1=major[1][4];major[2][4]-=scalar1*major[2][3];
+    scalar1=major[1][5];major[2][5]-=scalar1*major[2][3];
+    scalar1=major[1][6];major[2][6]-=scalar1*major[2][3];
+    scalar1=major[1][7];major[2][7]-=scalar1*major[2][3];
+
+
+    /* Homography is done. */
+    H[0]=major[2][0];
+    H[1]=major[2][1];
+    H[2]=major[2][2];
+
+    H[3]=major[2][4];
+    H[4]=major[2][5];
+    H[5]=major[2][6];
+
+    H[6]=major[2][7];
+    H[7]=major[2][3];
+    H[8]=1.0;
+}
+
+
+/**
+ * Returns whether refinement is possible.
+ *
+ * NB This is separate from whether it is *enabled*.
+ *
+ * @return 0 if refinement isn't possible, non-zero otherwise.
+ *
+ * Reads    (direct): best.numInl
+ * Reads   (callees): None.
+ * Writes   (direct): None.
+ * Writes  (callees): None.
+ */
+
+inline int    RHO_HEST_REFC::canRefine(void){
+    /**
+     * If we only have 4 matches, GE's result is already optimal and cannot
+     * be refined any further.
+     */
+
+    return best.numInl > (unsigned)SMPL_SIZE;
+}
+
+
+/**
+ * Refines the best-so-far homography (p->best.H).
+ *
+ * Reads    (direct): best.H, arg.src, arg.dst, best.inl, arg.N, lm.JtJ,
+ *                    lm.Jte, lm.tmp1
+ * Reads   (callees): None.
+ * Writes   (direct): best.H, lm.JtJ, lm.Jte, lm.tmp1
+ * Writes  (callees): None.
+ */
+
+inline void   RHO_HEST_REFC::refine(void){
+    int         i;
+    float       S, newS;  /* Sum of squared errors */
+    float       gain;     /* Gain-parameter. */
+    float       L  = 100.0f;/* Lambda of LevMarq */
+    float dH[8], newH[8];
+
+    /**
+     * Iteratively refine the homography.
+     */
+    /* Find initial conditions */
+    sacCalcJacobianErrors(best.H, arg.src, arg.dst, best.inl, arg.N,
+                          lm.JtJ, lm.Jte,  &S);
+
+    /*Levenberg-Marquardt Loop.*/
+    for(i=0;i<MAXLEVMARQITERS;i++){
+        /**
+         * Attempt a step given current state
+         *   - Jacobian-x-Jacobian   (JtJ)
+         *   - Jacobian-x-error      (Jte)
+         *   - Sum of squared errors (S)
+         * and current parameter
+         *   - Lambda (L)
+         * .
+         *
+         * This is done by solving the system of equations
+         *     Ax = b
+         * where A (JtJ) and b (Jte) are sourced from our current state, and
+         * the solution x becomes a step (dH) that is applied to best.H in
+         * order to compute a candidate homography (newH).
+         *
+         * The system above is solved by Cholesky decomposition of a
+         * sufficently-damped JtJ into a lower-triangular matrix (and its
+         * transpose), whose inverse is then computed. This inverse (and its
+         * transpose) then multiply Jte in order to find dH.
+         */
+
+        while(!sacChol8x8Damped(lm.JtJ, L, lm.tmp1)){
+            L *= 2.0f;
+        }
+        sacTRInv8x8   (lm.tmp1, lm.tmp1);
+        sacTRISolve8x8(lm.tmp1, lm.Jte,  dH);
+        sacSub8x1     (newH,       best.H,  dH);
+        sacCalcJacobianErrors(newH, arg.src, arg.dst, best.inl, arg.N,
+                              NULL, NULL, &newS);
+        gain = sacLMGain(dH, lm.Jte, S, newS, L);
+        /*printf("Lambda: %12.6f  S: %12.6f  newS: %12.6f  Gain: %12.6f\n",
+                 L, S, newS, gain);*/
+
+        /**
+         * If the gain is positive (i.e., the new Sum of Square Errors (newS)
+         * corresponding to newH is lower than the previous one (S) ), save
+         * the current state and accept the new step dH.
+         *
+         * If the gain is below LM_GAIN_LO, damp more (increase L), even if the
+         * gain was positive. If the gain is above LM_GAIN_HI, damp less
+         * (decrease L). Otherwise the gain is left unchanged.
+         */
+
+        if(gain < LM_GAIN_LO){
+            L *= 8;
+            if(L>1000.0f/FLT_EPSILON){
+                break;/* FIXME: Most naive termination criterion imaginable. */
+            }
+        }else if(gain > LM_GAIN_HI){
+            L *= 0.5;
+        }
+
+        if(gain > 0){
+            S = newS;
+            memcpy(best.H, newH, sizeof(newH));
+            sacCalcJacobianErrors(best.H, arg.src, arg.dst, best.inl, arg.N,
+                                  lm.JtJ, lm.Jte,  &S);
+        }
+    }
+}
+
+
+/**
+ * Compute directly the JtJ, Jte and sum-of-squared-error for a given
+ * homography and set of inliers.
+ *
+ * This is possible because the product of J and its transpose as well as with
+ * the error and the sum-of-squared-error can all be computed additively
+ * (match-by-match), as one would intuitively expect; All matches make
+ * contributions to the error independently of each other.
+ *
+ * What this allows is a constant-space implementation of Lev-Marq that can
+ * nevertheless be vectorized if need be.
+ */
+
+static inline void   sacCalcJacobianErrors(const float* H,
+                                           const float* src,
+                                           const float* dst,
+                                           const char*  inl,
+                                           unsigned     N,
+                                           float     (* JtJ)[8],
+                                           float*       Jte,
+                                           float*       Sp){
+    unsigned i;
+    float    S;
+
+    /* Zero out JtJ, Jte and S */
+    if(JtJ){memset(JtJ, 0, 8*8*sizeof(float));}
+    if(Jte){memset(Jte, 0, 8*1*sizeof(float));}
+    S = 0.0f;
+
+    /* Additively compute JtJ and Jte */
+    for(i=0;i<N;i++){
+        /* Skip outliers */
+        if(!inl[i]){
+            continue;
+        }
+
+        /**
+         * Otherwise, compute additively the upper triangular matrix JtJ and
+         * the Jtd vector within the following formula:
+         *
+         *     LaTeX:
+         *     (J^{T}J + \lambda \diag( J^{T}J )) \beta = J^{T}[ y - f(\Beta) ]
+         *     Simplified ASCII:
+         *     (JtJ + L*diag(JtJ)) beta = Jt e, where e (error) is y-f(Beta).
+         *
+         * For this we need to calculate
+         *     1) The 2D error (e) of the homography on the current point i
+         *        using the current parameters Beta.
+         *     2) The derivatives (J) of the error on the current point i under
+         *        perturbations of the current parameters Beta.
+         * Accumulate products of the error times the derivative to Jte, and
+         * products of the derivatives to JtJ.
+         */
+
+        /* Compute Squared Error */
+        float x       = src[2*i+0];
+        float y       = src[2*i+1];
+        float X       = dst[2*i+0];
+        float Y       = dst[2*i+1];
+        float W       = (H[6]*x + H[7]*y + 1.0f);
+        float iW      = fabs(W) > FLT_EPSILON ? 1.0f/W : 0;
+
+        float reprojX = (H[0]*x + H[1]*y + H[2]) * iW;
+        float reprojY = (H[3]*x + H[4]*y + H[5]) * iW;
+
+        float eX      = reprojX - X;
+        float eY      = reprojY - Y;
+        float e       = eX*eX + eY*eY;
+        S            += e;
+
+        /* Compute Jacobian */
+        if(JtJ || Jte){
+            float dxh11 = x          * iW;
+            float dxh12 = y          * iW;
+            float dxh13 =              iW;
+          /*float dxh21 = 0.0f;*/
+          /*float dxh22 = 0.0f;*/
+          /*float dxh23 = 0.0f;*/
+            float dxh31 = -reprojX*x * iW;
+            float dxh32 = -reprojX*y * iW;
+
+          /*float dyh11 = 0.0f;*/
+          /*float dyh12 = 0.0f;*/
+          /*float dyh13 = 0.0f;*/
+            float dyh21 = x          * iW;
+            float dyh22 = y          * iW;
+            float dyh23 =              iW;
+            float dyh31 = -reprojY*x * iW;
+            float dyh32 = -reprojY*y * iW;
+
+            /* Update Jte:          X             Y   */
+            if(Jte){
+                Jte[0]    += eX   *dxh11              ;/*  +0 */
+                Jte[1]    += eX   *dxh12              ;/*  +0 */
+                Jte[2]    += eX   *dxh13              ;/*  +0 */
+                Jte[3]    +=               eY   *dyh21;/* 0+  */
+                Jte[4]    +=               eY   *dyh22;/* 0+  */
+                Jte[5]    +=               eY   *dyh23;/* 0+  */
+                Jte[6]    += eX   *dxh31 + eY   *dyh31;/*  +  */
+                Jte[7]    += eX   *dxh32 + eY   *dyh32;/*  +  */
+            }
+
+            /* Update JtJ:          X             Y    */
+            if(JtJ){
+                JtJ[0][0] += dxh11*dxh11              ;/*  +0 */
+
+                JtJ[1][0] += dxh11*dxh12              ;/*  +0 */
+                JtJ[1][1] += dxh12*dxh12              ;/*  +0 */
+
+                JtJ[2][0] += dxh11*dxh13              ;/*  +0 */
+                JtJ[2][1] += dxh12*dxh13              ;/*  +0 */
+                JtJ[2][2] += dxh13*dxh13              ;/*  +0 */
+
+              /*JtJ[3][0] +=                          ;   0+0 */
+              /*JtJ[3][1] +=                          ;   0+0 */
+              /*JtJ[3][2] +=                          ;   0+0 */
+                JtJ[3][3] +=               dyh21*dyh21;/* 0+  */
+
+              /*JtJ[4][0] +=                          ;   0+0 */
+              /*JtJ[4][1] +=                          ;   0+0 */
+              /*JtJ[4][2] +=                          ;   0+0 */
+                JtJ[4][3] +=               dyh21*dyh22;/* 0+  */
+                JtJ[4][4] +=               dyh22*dyh22;/* 0+  */
+
+              /*JtJ[5][0] +=                          ;   0+0 */
+              /*JtJ[5][1] +=                          ;   0+0 */
+              /*JtJ[5][2] +=                          ;   0+0 */
+                JtJ[5][3] +=               dyh21*dyh23;/* 0+  */
+                JtJ[5][4] +=               dyh22*dyh23;/* 0+  */
+                JtJ[5][5] +=               dyh23*dyh23;/* 0+  */
+
+                JtJ[6][0] += dxh11*dxh31              ;/*  +0 */
+                JtJ[6][1] += dxh12*dxh31              ;/*  +0 */
+                JtJ[6][2] += dxh13*dxh31              ;/*  +0 */
+                JtJ[6][3] +=               dyh21*dyh31;/* 0+  */
+                JtJ[6][4] +=               dyh22*dyh31;/* 0+  */
+                JtJ[6][5] +=               dyh23*dyh31;/* 0+  */
+                JtJ[6][6] += dxh31*dxh31 + dyh31*dyh31;/*  +  */
+
+                JtJ[7][0] += dxh11*dxh32              ;/*  +0 */
+                JtJ[7][1] += dxh12*dxh32              ;/*  +0 */
+                JtJ[7][2] += dxh13*dxh32              ;/*  +0 */
+                JtJ[7][3] +=               dyh21*dyh32;/* 0+  */
+                JtJ[7][4] +=               dyh22*dyh32;/* 0+  */
+                JtJ[7][5] +=               dyh23*dyh32;/* 0+  */
+                JtJ[7][6] += dxh31*dxh32 + dyh31*dyh32;/*  +  */
+                JtJ[7][7] += dxh32*dxh32 + dyh32*dyh32;/*  +  */
+            }
+        }
+    }
+
+    if(Sp){*Sp = S;}
+}
+
+
+/**
+ * Compute the Levenberg-Marquardt "gain" obtained by the given step dH.
+ *
+ * Drawn from http://www2.imm.dtu.dk/documents/ftp/tr99/tr05_99.pdf.
+ */
+
+static inline float  sacLMGain(const float*  dH,
+                               const float*  Jte,
+                               const float   S,
+                               const float   newS,
+                               const float   lambda){
+    float dS = S-newS;
+    float dL = 0;
+    int i;
+
+    /* Compute h^t h... */
+    for(i=0;i<8;i++){
+        dL += dH[i]*dH[i];
+    }
+    /* Compute mu * h^t h... */
+    dL *= lambda;
+    /* Subtract h^t F'... */
+    for(i=0;i<8;i++){
+        dL += dH[i]*Jte[i];/* += as opposed to -=, since dH we compute is
+                              opposite sign. */
+    }
+    /* Multiply by 1/2... */
+    dL *= 0.5;
+
+    /* Return gain as S-newS / L0 - LH. */
+    return fabs(dL) < FLT_EPSILON ? dS : dS / dL;
+}
+
+
+/**
+ * Cholesky decomposition on 8x8 real positive-definite matrix defined by its
+ * lower-triangular half. Outputs L, the lower triangular part of the
+ * decomposition.
+ *
+ * A and L can overlap fully (in-place) or not at all, but may not partially
+ * overlap.
+ *
+ * For damping, the diagonal elements are scaled by 1.0 + lambda.
+ *
+ * Returns zero if decomposition unsuccessful, and non-zero otherwise.
+ *
+ * Source: http://en.wikipedia.org/wiki/Cholesky_decomposition#
+ * The_Cholesky.E2.80.93Banachiewicz_and_Cholesky.E2.80.93Crout_algorithms
+ */
+
+static inline int    sacChol8x8Damped(const float (*A)[8],
+                                      float         lambda,
+                                      float       (*L)[8]){
+    const register int N = 8;
+    int i, j, k;
+    float  lambdap1 = lambda + 1.0f;
+    float  x;
+
+    for(i=0;i<N;i++){/* Row */
+        /* Pre-diagonal elements */
+        for(j=0;j<i;j++){
+            x = A[i][j];               /* Aij */
+            for(k=0;k<j;k++){
+                x -= L[i][k] * L[j][k];/* - Sum_{k=0..j-1} Lik*Ljk */
+            }
+            L[i][j] = x / L[j][j];     /* Lij = ... / Ljj */
+        }
+
+        /* Diagonal element */
+        {j = i;
+            x = A[j][j] * lambdap1;    /* Ajj */
+            for(k=0;k<j;k++){
+                x -= L[j][k] * L[j][k];/* - Sum_{k=0..j-1} Ljk^2 */
+            }
+            if(x<0){
+                return 0;
+            }
+            L[j][j] = sqrtf(x);        /* Ljj = sqrt( ... ) */
+        }
+    }
+
+    return 1;
+}
+
+
+/**
+ * Invert lower-triangular 8x8 matrix L into lower-triangular matrix M.
+ *
+ * L and M can overlap fully (in-place) or not at all, but may not partially
+ * overlap.
+ *
+ * Uses formulation from
+ * http://www.cs.berkeley.edu/~knight/knight_math221_poster.pdf
+ * , adjusted for the fact that A^T^-1 = A^-1^T. Thus:
+ *
+ * U11    U12                   U11^-1   -U11^-1*U12*U22^-1
+ *                ->
+ *  0     U22                     0            U22^-1
+ *
+ * Becomes
+ *
+ * L11     0                    L11^-1           0
+ *                ->
+ * L21    L22            -L22^-1*L21*L11^-1    L22^-1
+ *
+ * Since
+ *
+ * ( -L11^T^-1*L21^T*L22^T^-1 )^T = -L22^T^-1^T*L21^T^T*L11^T^-1^T
+ *                                = -L22^T^T^-1*L21^T^T*L11^T^T^-1
+ *                                = -L22^-1*L21*L11^-1
+ */
+
+static inline void   sacTRInv8x8(const float (*L)[8],
+                                 float       (*M)[8]){
+    float s[2][2], t[2][2];
+    float u[4][4], v[4][4];
+
+    /*
+        L00  0   0   0   0   0   0   0
+        L10 L11  0   0   0   0   0   0
+        L20 L21 L22  0   0   0   0   0
+        L30 L31 L32 L33  0   0   0   0
+        L40 L41 L42 L43 L44  0   0   0
+        L50 L51 L52 L53 L54 L55  0   0
+        L60 L61 L62 L63 L64 L65 L66  0
+        L70 L71 L72 L73 L74 L75 L76 L77
+    */
+
+    /* Invert 4*2 1x1 matrices; Starts recursion. */
+    M[0][0] = 1.0f/L[0][0];
+    M[1][1] = 1.0f/L[1][1];
+    M[2][2] = 1.0f/L[2][2];
+    M[3][3] = 1.0f/L[3][3];
+    M[4][4] = 1.0f/L[4][4];
+    M[5][5] = 1.0f/L[5][5];
+    M[6][6] = 1.0f/L[6][6];
+    M[7][7] = 1.0f/L[7][7];
+
+    /*
+        M00  0   0   0   0   0   0   0
+        L10 M11  0   0   0   0   0   0
+        L20 L21 M22  0   0   0   0   0
+        L30 L31 L32 M33  0   0   0   0
+        L40 L41 L42 L43 M44  0   0   0
+        L50 L51 L52 L53 L54 M55  0   0
+        L60 L61 L62 L63 L64 L65 M66  0
+        L70 L71 L72 L73 L74 L75 L76 M77
+    */
+
+    /* 4*2 Matrix products of 1x1 matrices */
+    M[1][0] = -M[1][1]*L[1][0]*M[0][0];
+    M[3][2] = -M[3][3]*L[3][2]*M[2][2];
+    M[5][4] = -M[5][5]*L[5][4]*M[4][4];
+    M[7][6] = -M[7][7]*L[7][6]*M[6][6];
+
+    /*
+        M00  0   0   0   0   0   0   0
+        M10 M11  0   0   0   0   0   0
+        L20 L21 M22  0   0   0   0   0
+        L30 L31 M32 M33  0   0   0   0
+        L40 L41 L42 L43 M44  0   0   0
+        L50 L51 L52 L53 M54 M55  0   0
+        L60 L61 L62 L63 L64 L65 M66  0
+        L70 L71 L72 L73 L74 L75 M76 M77
+    */
+
+    /* 2*2 Matrix products of 2x2 matrices */
+
+    /*
+       (M22  0 )   (L20 L21)   (M00  0 )
+     - (M32 M33) x (L30 L31) x (M10 M11)
+    */
+
+    s[0][0] = M[2][2]*L[2][0];
+    s[0][1] = M[2][2]*L[2][1];
+    s[1][0] = M[3][2]*L[2][0]+M[3][3]*L[3][0];
+    s[1][1] = M[3][2]*L[2][1]+M[3][3]*L[3][1];
+
+    t[0][0] = s[0][0]*M[0][0]+s[0][1]*M[1][0];
+    t[0][1] =                 s[0][1]*M[1][1];
+    t[1][0] = s[1][0]*M[0][0]+s[1][1]*M[1][0];
+    t[1][1] =                 s[1][1]*M[1][1];
+
+    M[2][0] = -t[0][0];
+    M[2][1] = -t[0][1];
+    M[3][0] = -t[1][0];
+    M[3][1] = -t[1][1];
+
+    /*
+       (M66  0 )   (L64 L65)   (M44  0 )
+     - (L76 M77) x (L74 L75) x (M54 M55)
+    */
+
+    s[0][0] = M[6][6]*L[6][4];
+    s[0][1] = M[6][6]*L[6][5];
+    s[1][0] = M[7][6]*L[6][4]+M[7][7]*L[7][4];
+    s[1][1] = M[7][6]*L[6][5]+M[7][7]*L[7][5];
+
+    t[0][0] = s[0][0]*M[4][4]+s[0][1]*M[5][4];
+    t[0][1] =                 s[0][1]*M[5][5];
+    t[1][0] = s[1][0]*M[4][4]+s[1][1]*M[5][4];
+    t[1][1] =                 s[1][1]*M[5][5];
+
+    M[6][4] = -t[0][0];
+    M[6][5] = -t[0][1];
+    M[7][4] = -t[1][0];
+    M[7][5] = -t[1][1];
+
+    /*
+        M00  0   0   0   0   0   0   0
+        M10 M11  0   0   0   0   0   0
+        M20 M21 M22  0   0   0   0   0
+        M30 M31 M32 M33  0   0   0   0
+        L40 L41 L42 L43 M44  0   0   0
+        L50 L51 L52 L53 M54 M55  0   0
+        L60 L61 L62 L63 M64 M65 M66  0
+        L70 L71 L72 L73 M74 M75 M76 M77
+    */
+
+    /* 1*2 Matrix products of 4x4 matrices */
+
+    /*
+       (M44  0   0   0 )   (L40 L41 L42 L43)   (M00  0   0   0 )
+       (M54 M55  0   0 )   (L50 L51 L52 L53)   (M10 M11  0   0 )
+       (M64 M65 M66  0 )   (L60 L61 L62 L63)   (M20 M21 M22  0 )
+     - (M74 M75 M76 M77) x (L70 L71 L72 L73) x (M30 M31 M32 M33)
+    */
+
+    u[0][0] = M[4][4]*L[4][0];
+    u[0][1] = M[4][4]*L[4][1];
+    u[0][2] = M[4][4]*L[4][2];
+    u[0][3] = M[4][4]*L[4][3];
+    u[1][0] = M[5][4]*L[4][0]+M[5][5]*L[5][0];
+    u[1][1] = M[5][4]*L[4][1]+M[5][5]*L[5][1];
+    u[1][2] = M[5][4]*L[4][2]+M[5][5]*L[5][2];
+    u[1][3] = M[5][4]*L[4][3]+M[5][5]*L[5][3];
+    u[2][0] = M[6][4]*L[4][0]+M[6][5]*L[5][0]+M[6][6]*L[6][0];
+    u[2][1] = M[6][4]*L[4][1]+M[6][5]*L[5][1]+M[6][6]*L[6][1];
+    u[2][2] = M[6][4]*L[4][2]+M[6][5]*L[5][2]+M[6][6]*L[6][2];
+    u[2][3] = M[6][4]*L[4][3]+M[6][5]*L[5][3]+M[6][6]*L[6][3];
+    u[3][0] = M[7][4]*L[4][0]+M[7][5]*L[5][0]+M[7][6]*L[6][0]+M[7][7]*L[7][0];
+    u[3][1] = M[7][4]*L[4][1]+M[7][5]*L[5][1]+M[7][6]*L[6][1]+M[7][7]*L[7][1];
+    u[3][2] = M[7][4]*L[4][2]+M[7][5]*L[5][2]+M[7][6]*L[6][2]+M[7][7]*L[7][2];
+    u[3][3] = M[7][4]*L[4][3]+M[7][5]*L[5][3]+M[7][6]*L[6][3]+M[7][7]*L[7][3];
+
+    v[0][0] = u[0][0]*M[0][0]+u[0][1]*M[1][0]+u[0][2]*M[2][0]+u[0][3]*M[3][0];
+    v[0][1] =                 u[0][1]*M[1][1]+u[0][2]*M[2][1]+u[0][3]*M[3][1];
+    v[0][2] =                                 u[0][2]*M[2][2]+u[0][3]*M[3][2];
+    v[0][3] =                                                 u[0][3]*M[3][3];
+    v[1][0] = u[1][0]*M[0][0]+u[1][1]*M[1][0]+u[1][2]*M[2][0]+u[1][3]*M[3][0];
+    v[1][1] =                 u[1][1]*M[1][1]+u[1][2]*M[2][1]+u[1][3]*M[3][1];
+    v[1][2] =                                 u[1][2]*M[2][2]+u[1][3]*M[3][2];
+    v[1][3] =                                                 u[1][3]*M[3][3];
+    v[2][0] = u[2][0]*M[0][0]+u[2][1]*M[1][0]+u[2][2]*M[2][0]+u[2][3]*M[3][0];
+    v[2][1] =                 u[2][1]*M[1][1]+u[2][2]*M[2][1]+u[2][3]*M[3][1];
+    v[2][2] =                                 u[2][2]*M[2][2]+u[2][3]*M[3][2];
+    v[2][3] =                                                 u[2][3]*M[3][3];
+    v[3][0] = u[3][0]*M[0][0]+u[3][1]*M[1][0]+u[3][2]*M[2][0]+u[3][3]*M[3][0];
+    v[3][1] =                 u[3][1]*M[1][1]+u[3][2]*M[2][1]+u[3][3]*M[3][1];
+    v[3][2] =                                 u[3][2]*M[2][2]+u[3][3]*M[3][2];
+    v[3][3] =                                                 u[3][3]*M[3][3];
+
+    M[4][0] = -v[0][0];
+    M[4][1] = -v[0][1];
+    M[4][2] = -v[0][2];
+    M[4][3] = -v[0][3];
+    M[5][0] = -v[1][0];
+    M[5][1] = -v[1][1];
+    M[5][2] = -v[1][2];
+    M[5][3] = -v[1][3];
+    M[6][0] = -v[2][0];
+    M[6][1] = -v[2][1];
+    M[6][2] = -v[2][2];
+    M[6][3] = -v[2][3];
+    M[7][0] = -v[3][0];
+    M[7][1] = -v[3][1];
+    M[7][2] = -v[3][2];
+    M[7][3] = -v[3][3];
+
+    /*
+        M00  0   0   0   0   0   0   0
+        M10 M11  0   0   0   0   0   0
+        M20 M21 M22  0   0   0   0   0
+        M30 M31 M32 M33  0   0   0   0
+        M40 M41 M42 M43 M44  0   0   0
+        M50 M51 M52 M53 M54 M55  0   0
+        M60 M61 M62 M63 M64 M65 M66  0
+        M70 M71 M72 M73 M74 M75 M76 M77
+    */
+}
+
+
+/**
+ * Solves dH = inv(JtJ) Jte. The argument lower-triangular matrix is the
+ * inverse of L as produced by the Cholesky decomposition LL^T of the matrix
+ * JtJ; Thus the operation performed here is a left-multiplication of a vector
+ * by two triangular matrices. The math is below:
+ *
+ * JtJ      = LL^T
+ * Linv     = L^-1
+ * (JtJ)^-1 = (LL^T)^-1
+ *          = (L^T^-1)(Linv)
+ *          = (Linv^T)(Linv)
+ * dH       = ((JtJ)^-1) (Jte)
+ *          = (Linv^T)(Linv) (Jte)
+ *
+ * where J is nx8, Jt is 8xn, JtJ is 8x8 PD, e is nx1, Jte is 8x1, L is lower
+ * triangular 8x8 and dH is 8x1.
+ */
+
+static inline void   sacTRISolve8x8(const float (*L)[8],
+                                    const float*  Jte,
+                                    float*        dH){
+    float t[8];
+
+    t[0]  = L[0][0]*Jte[0];
+    t[1]  = L[1][0]*Jte[0]+L[1][1]*Jte[1];
+    t[2]  = L[2][0]*Jte[0]+L[2][1]*Jte[1]+L[2][2]*Jte[2];
+    t[3]  = L[3][0]*Jte[0]+L[3][1]*Jte[1]+L[3][2]*Jte[2]+L[3][3]*Jte[3];
+    t[4]  = L[4][0]*Jte[0]+L[4][1]*Jte[1]+L[4][2]*Jte[2]+L[4][3]*Jte[3]+L[4][4]*Jte[4];
+    t[5]  = L[5][0]*Jte[0]+L[5][1]*Jte[1]+L[5][2]*Jte[2]+L[5][3]*Jte[3]+L[5][4]*Jte[4]+L[5][5]*Jte[5];
+    t[6]  = L[6][0]*Jte[0]+L[6][1]*Jte[1]+L[6][2]*Jte[2]+L[6][3]*Jte[3]+L[6][4]*Jte[4]+L[6][5]*Jte[5]+L[6][6]*Jte[6];
+    t[7]  = L[7][0]*Jte[0]+L[7][1]*Jte[1]+L[7][2]*Jte[2]+L[7][3]*Jte[3]+L[7][4]*Jte[4]+L[7][5]*Jte[5]+L[7][6]*Jte[6]+L[7][7]*Jte[7];
+
+
+    dH[0] = L[0][0]*t[0]+L[1][0]*t[1]+L[2][0]*t[2]+L[3][0]*t[3]+L[4][0]*t[4]+L[5][0]*t[5]+L[6][0]*t[6]+L[7][0]*t[7];
+    dH[1] =              L[1][1]*t[1]+L[2][1]*t[2]+L[3][1]*t[3]+L[4][1]*t[4]+L[5][1]*t[5]+L[6][1]*t[6]+L[7][1]*t[7];
+    dH[2] =                           L[2][2]*t[2]+L[3][2]*t[3]+L[4][2]*t[4]+L[5][2]*t[5]+L[6][2]*t[6]+L[7][2]*t[7];
+    dH[3] =                                        L[3][3]*t[3]+L[4][3]*t[4]+L[5][3]*t[5]+L[6][3]*t[6]+L[7][3]*t[7];
+    dH[4] =                                                     L[4][4]*t[4]+L[5][4]*t[5]+L[6][4]*t[6]+L[7][4]*t[7];
+    dH[5] =                                                                  L[5][5]*t[5]+L[6][5]*t[6]+L[7][5]*t[7];
+    dH[6] =                                                                               L[6][6]*t[6]+L[7][6]*t[7];
+    dH[7] =                                                                                            L[7][7]*t[7];
+}
+
+
+/**
+ * Subtract dH from H.
+ */
+
+static inline void   sacSub8x1(float* Hout, const float* H, const float* dH){
+    Hout[0] = H[0] - dH[0];
+    Hout[1] = H[1] - dH[1];
+    Hout[2] = H[2] - dH[2];
+    Hout[3] = H[3] - dH[3];
+    Hout[4] = H[4] - dH[4];
+    Hout[5] = H[5] - dH[5];
+    Hout[6] = H[6] - dH[6];
+    Hout[7] = H[7] - dH[7];
+}
+
+
+/* End namespace cv */
+}
diff --git a/modules/calib3d/src/rho.h b/modules/calib3d/src/rho.h
new file mode 100644
index 000000000..082a41603
--- /dev/null
+++ b/modules/calib3d/src/rho.h
@@ -0,0 +1,268 @@
+/*
+  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+
+  By downloading, copying, installing or using the software you agree to this license.
+  If you do not agree to this license, do not download, install,
+  copy or use the software.
+
+
+                          BSD 3-Clause License
+
+ Copyright (C) 2014, Olexa Bilaniuk, Hamid Bazargani & Robert Laganiere, all rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+   * Redistribution's of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+
+   * Redistribution's in binary form must reproduce the above copyright notice,
+     this list of conditions and the following disclaimer in the documentation
+     and/or other materials provided with the distribution.
+
+   * The name of the copyright holders may not be used to endorse or promote products
+     derived from this software without specific prior written permission.
+
+ This software is provided by the copyright holders and contributors "as is" and
+ any express or implied warranties, including, but not limited to, the implied
+ warranties of merchantability and fitness for a particular purpose are disclaimed.
+ In no event shall the Intel Corporation or contributors be liable for any direct,
+ indirect, incidental, special, exemplary, or consequential damages
+ (including, but not limited to, procurement of substitute goods or services;
+ loss of use, data, or profits; or business interruption) however caused
+ and on any theory of liability, whether in contract, strict liability,
+ or tort (including negligence or otherwise) arising in any way out of
+ the use of this software, even if advised of the possibility of such damage.
+*/
+
+/**
+ * Bilaniuk, Olexa, Hamid Bazargani, and Robert Laganiere. "Fast Target
+ * Recognition on Mobile Devices: Revisiting Gaussian Elimination for the
+ * Estimation of Planar Homographies." In Computer Vision and Pattern
+ * Recognition Workshops (CVPRW), 2014 IEEE Conference on, pp. 119-125.
+ * IEEE, 2014.
+ */
+
+/* Include Guards */
+#ifndef __OPENCV_RHO_H__
+#define __OPENCV_RHO_H__
+
+
+
+/* Includes */
+#include <opencv2/core.hpp>
+#include <stdint.h>
+
+
+
+
+
+/* Defines */
+
+
+/* Flags */
+#ifndef RHO_FLAG_NONE
+#define RHO_FLAG_NONE                        (0U<<0)
+#endif
+#ifndef RHO_FLAG_ENABLE_NR
+#define RHO_FLAG_ENABLE_NR                   (1U<<0)
+#endif
+#ifndef RHO_FLAG_ENABLE_REFINEMENT
+#define RHO_FLAG_ENABLE_REFINEMENT           (1U<<1)
+#endif
+#ifndef RHO_FLAG_ENABLE_FINAL_REFINEMENT
+#define RHO_FLAG_ENABLE_FINAL_REFINEMENT     (1U<<2)
+#endif
+
+
+
+/* Namespace cv */
+namespace cv{
+
+/* Data structures */
+
+/**
+ * Homography Estimation context.
+ */
+
+struct RHO_HEST;
+typedef struct RHO_HEST RHO_HEST;
+
+
+/* Functions */
+
+/**
+ * Initialize the estimator context, by allocating the aligned buffers
+ * internally needed.
+ *
+ * @return A pointer to the context if successful; NULL if an error occured.
+ */
+
+Ptr<RHO_HEST> rhoInit(void);
+
+
+/**
+ * Ensure that the estimator context's internal table for non-randomness
+ * criterion is at least of the given size, and uses the given beta. The table
+ * should be larger than the maximum number of matches fed into the estimator.
+ *
+ * A value of N of 0 requests deallocation of the table.
+ *
+ * @param [in] p     The initialized estimator context
+ * @param [in] N     If 0, deallocate internal table. If > 0, ensure that the
+ *                   internal table is of at least this size, reallocating if
+ *                   necessary.
+ * @param [in] beta  The beta-factor to use within the table.
+ * @return 0 if unsuccessful; non-zero otherwise.
+ */
+
+int  rhoEnsureCapacity(Ptr<RHO_HEST> p, unsigned N, double beta);
+
+
+
+/**
+ * Seeds the internal PRNG with the given seed.
+ *
+ * Although it is not required to call this function, since context
+ * initialization seeds itself with entropy from rand(), this function allows
+ * reproducible results by using a specified seed.
+ *
+ * @param [in] p    The estimator context whose PRNG is to be seeded.
+ * @param [in] seed The 64-bit integer seed.
+ */
+
+void rhoSeed(Ptr<RHO_HEST> p, uint64_t seed);
+
+
+/**
+ * Estimates the homography using the given context, matches and parameters to
+ * PROSAC.
+ *
+ * The given context must have been initialized.
+ *
+ * The matches are provided as two arrays of N single-precision, floating-point
+ * (x,y) points. Points with corresponding offsets in the two arrays constitute
+ * a match. The homography estimation attempts to find the 3x3 matrix H which
+ * best maps the homogeneous-coordinate points in the source array to their
+ * corresponding homogeneous-coordinate points in the destination array.
+ *
+ *     Note: At least 4 matches must be provided (N >= 4).
+ *     Note: A point in either array takes up 2 floats. The first of two stores
+ *           the x-coordinate and the second of the two stores the y-coordinate.
+ *           Thus, the arrays resemble this in memory:
+ *
+ *           src =    [x0, y0, x1, y1, x2, y2, x3, y3, x4, y4, ...]
+ *           Matches:     |       |       |       |       |
+ *           dst =    [x0, y0, x1, y1, x2, y2, x3, y3, x4, y4, ...]
+ *     Note: The matches are expected to be provided sorted by quality, or at
+ *           least not be worse-than-random in ordering.
+ *
+ * A pointer to the base of an array of N bytes can be provided. It serves as
+ * an output mask to indicate whether the corresponding match is an inlier to
+ * the returned homography, if any. A zero indicates an outlier; A non-zero
+ * value indicates an inlier.
+ *
+ * The PROSAC estimator requires a few parameters of its own. These are:
+ *
+ *     - The maximum distance that a source point projected onto the destination
+ *           plane can be from its putative match and still be considered an
+ *           inlier. Must be non-negative.
+ *           A sane default is 3.0.
+ *     - The maximum number of PROSAC iterations. This corresponds to the
+ *           largest number of samples that will be drawn and tested.
+ *           A sane default is 2000.
+ *     - The RANSAC convergence parameter. This corresponds to the number of
+ *           iterations after which PROSAC will start sampling like RANSAC.
+ *           A sane default is 2000.
+ *     - The confidence threshold. This corresponds to the probability of
+ *           finding a correct solution. Must be bounded by [0, 1].
+ *           A sane default is 0.995.
+ *     - The minimum number of inliers acceptable. Only a solution with at
+ *           least this many inliers will be returned. The minimum is 4.
+ *           A sane default is 10% of N.
+ *     - The beta-parameter for the non-randomness termination criterion.
+ *           Ignored if non-randomness criterion disabled, otherwise must be
+ *           bounded by (0, 1).
+ *           A sane default is 0.35.
+ *     - Optional flags to control the estimation. Available flags are:
+ *           HEST_FLAG_NONE:
+ *               No special processing.
+ *           HEST_FLAG_ENABLE_NR:
+ *               Enable non-randomness criterion. If set, the beta parameter
+ *               must also be set.
+ *           HEST_FLAG_ENABLE_REFINEMENT:
+ *               Enable refinement of each new best model, as they are found.
+ *           HEST_FLAG_ENABLE_FINAL_REFINEMENT:
+ *               Enable one final refinement of the best model found before
+ *               returning it.
+ *
+ * The PROSAC estimator optionally accepts an extrinsic initial guess of H.
+ *
+ * The PROSAC estimator outputs a final estimate of H provided it was able to
+ * find one with a minimum of supporting inliers. If it was not, it outputs
+ * the all-zero matrix.
+ *
+ * The extrinsic guess at and final estimate of H are both in the same form:
+ * A 3x3 single-precision floating-point matrix with step 3. Thus, it is a
+ * 9-element array of floats, with the elements as follows:
+ *
+ *     [ H00, H01, H02,
+ *       H10, H11, H12,
+ *       H20, H21, 1.0 ]
+ *
+ * Notice that the homography is normalized to H22 = 1.0.
+ *
+ * The function returns the number of inliers if it was able to find a
+ * homography with at least the minimum required support, and 0 if it was not.
+ *
+ *
+ * @param [in/out] p       The context to use for homography estimation. Must
+ *                             be already initialized. Cannot be NULL.
+ * @param [in]     src     The pointer to the source points of the matches.
+ *                             Must be aligned to 4 bytes. Cannot be NULL.
+ * @param [in]     dst     The pointer to the destination points of the matches.
+ *                             Must be aligned to 4 bytes. Cannot be NULL.
+ * @param [out]    inl     The pointer to the output mask of inlier matches.
+ *                             Must be aligned to 4 bytes. May be NULL.
+ * @param [in]     N       The number of matches. Minimum 4.
+ * @param [in]     maxD    The maximum distance. Minimum 0.
+ * @param [in]     maxI    The maximum number of PROSAC iterations.
+ * @param [in]     rConvg  The RANSAC convergence parameter.
+ * @param [in]     cfd     The required confidence in the solution.
+ * @param [in]     minInl  The minimum required number of inliers. Minimum 4.
+ * @param [in]     beta    The beta-parameter for the non-randomness criterion.
+ * @param [in]     flags   A union of flags to fine-tune the estimation.
+ * @param [in]     guessH  An extrinsic guess at the solution H, or NULL if
+ *                         none provided.
+ * @param [out]    finalH  The final estimation of H, or the zero matrix if
+ *                         the minimum number of inliers was not met.
+ *                         Cannot be NULL.
+ * @return                 The number of inliers if the minimum number of
+ *                         inliers for acceptance was reached; 0 otherwise.
+ */
+
+unsigned rhoHest(Ptr<RHO_HEST> p,       /* Homography estimation context. */
+                 const float*  src,     /* Source points */
+                 const float*  dst,     /* Destination points */
+                 char*         inl,     /* Inlier mask */
+                 unsigned      N,       /*  = src.length = dst.length = inl.length */
+                 float         maxD,    /*   3.0 */
+                 unsigned      maxI,    /*  2000 */
+                 unsigned      rConvg,  /*  2000 */
+                 double        cfd,     /* 0.995 */
+                 unsigned      minInl,  /*     4 */
+                 double        beta,    /*  0.35 */
+                 unsigned      flags,   /*     0 */
+                 const float*  guessH,  /* Extrinsic guess, NULL if none provided */
+                 float*        finalH); /* Final result. */
+
+
+
+
+/* End Namespace cv */
+}
+
+
+
+
+#endif
diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp
index 129c10ee7..dd5c5eb8c 100644
--- a/modules/calib3d/src/solvepnp.cpp
+++ b/modules/calib3d/src/solvepnp.cpp
@@ -48,41 +48,43 @@
 #include "opencv2/calib3d/calib3d_c.h"
 
 #include <iostream>
-using namespace cv;
 
-bool cv::solvePnP( InputArray _opoints, InputArray _ipoints,
-                  InputArray _cameraMatrix, InputArray _distCoeffs,
-                  OutputArray _rvec, OutputArray _tvec, bool useExtrinsicGuess, int flags )
+namespace cv
+{
+
+bool solvePnP( InputArray _opoints, InputArray _ipoints,
+               InputArray _cameraMatrix, InputArray _distCoeffs,
+               OutputArray _rvec, OutputArray _tvec, bool useExtrinsicGuess, int flags )
 {
     Mat opoints = _opoints.getMat(), ipoints = _ipoints.getMat();
     int npoints = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F));
     CV_Assert( npoints >= 0 && npoints == std::max(ipoints.checkVector(2, CV_32F), ipoints.checkVector(2, CV_64F)) );
     _rvec.create(3, 1, CV_64F);
     _tvec.create(3, 1, CV_64F);
-    Mat cameraMatrix = _cameraMatrix.getMat(), distCoeffs = _distCoeffs.getMat();
+    Mat cameraMatrix = Mat_<double>(_cameraMatrix.getMat()), distCoeffs = Mat_<double>(_distCoeffs.getMat());
 
-    if (flags == SOLVEPNP_EPNP)
+    if (flags == SOLVEPNP_EPNP || flags == SOLVEPNP_DLS || flags == SOLVEPNP_UPNP)
     {
-        cv::Mat undistortedPoints;
-        cv::undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs);
+        Mat undistortedPoints;
+        undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs);
         epnp PnP(cameraMatrix, opoints, undistortedPoints);
 
-        cv::Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat();
+        Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat();
         PnP.compute_pose(R, tvec);
-        cv::Rodrigues(R, rvec);
+        Rodrigues(R, rvec);
         return true;
     }
     else if (flags == SOLVEPNP_P3P)
     {
         CV_Assert( npoints == 4);
-        cv::Mat undistortedPoints;
-        cv::undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs);
+        Mat undistortedPoints;
+        undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs);
         p3p P3Psolver(cameraMatrix);
 
-        cv::Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat();
+        Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat();
         bool result = P3Psolver.solve(R, tvec, opoints, undistortedPoints);
         if (result)
-            cv::Rodrigues(R, rvec);
+            Rodrigues(R, rvec);
         return result;
     }
     else if (flags == SOLVEPNP_ITERATIVE)
@@ -95,32 +97,32 @@ bool cv::solvePnP( InputArray _opoints, InputArray _ipoints,
                                      &c_rvec, &c_tvec, useExtrinsicGuess );
         return true;
     }
-    else if (flags == SOLVEPNP_DLS)
+    /*else if (flags == SOLVEPNP_DLS)
     {
-        cv::Mat undistortedPoints;
-        cv::undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs);
+        Mat undistortedPoints;
+        undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs);
 
         dls PnP(opoints, undistortedPoints);
 
-        cv::Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat();
+        Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat();
         bool result = PnP.compute_pose(R, tvec);
         if (result)
-            cv::Rodrigues(R, rvec);
+            Rodrigues(R, rvec);
         return result;
     }
     else if (flags == SOLVEPNP_UPNP)
     {
         upnp PnP(cameraMatrix, opoints, ipoints);
 
-        cv::Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat();
+        Mat R, rvec = _rvec.getMat(), tvec = _tvec.getMat();
         double f = PnP.compute_pose(R, tvec);
-        cv::Rodrigues(R, rvec);
+        Rodrigues(R, rvec);
         if(cameraMatrix.type() == CV_32F)
             cameraMatrix.at<float>(0,0) = cameraMatrix.at<float>(1,1) = (float)f;
         else
             cameraMatrix.at<double>(0,0) = cameraMatrix.at<double>(1,1) = f;
         return true;
-    }
+    }*/
     else
         CV_Error(CV_StsBadArg, "The flags argument must be one of SOLVEPNP_ITERATIVE, SOLVEPNP_P3P, SOLVEPNP_EPNP or SOLVEPNP_DLS");
     return false;
@@ -131,7 +133,7 @@ class PnPRansacCallback : public PointSetRegistrator::Callback
 
 public:
 
-    PnPRansacCallback(Mat _cameraMatrix=Mat(3,3,CV_64F), Mat _distCoeffs=Mat(4,1,CV_64F), int _flags=cv::SOLVEPNP_ITERATIVE,
+    PnPRansacCallback(Mat _cameraMatrix=Mat(3,3,CV_64F), Mat _distCoeffs=Mat(4,1,CV_64F), int _flags=SOLVEPNP_ITERATIVE,
             bool _useExtrinsicGuess=false, Mat _rvec=Mat(), Mat _tvec=Mat() )
         : cameraMatrix(_cameraMatrix), distCoeffs(_distCoeffs), flags(_flags), useExtrinsicGuess(_useExtrinsicGuess),
           rvec(_rvec), tvec(_tvec) {}
@@ -142,12 +144,11 @@ public:
     {
         Mat opoints = _m1.getMat(), ipoints = _m2.getMat();
 
-
-        bool correspondence = cv::solvePnP( _m1, _m2, cameraMatrix, distCoeffs,
+        bool correspondence = solvePnP( _m1, _m2, cameraMatrix, distCoeffs,
                                             rvec, tvec, useExtrinsicGuess, flags );
 
         Mat _local_model;
-        cv::hconcat(rvec, tvec, _local_model);
+        hconcat(rvec, tvec, _local_model);
         _local_model.copyTo(_model);
 
         return correspondence;
@@ -166,7 +167,7 @@ public:
 
 
         Mat projpoints(count, 2, CV_32FC1);
-        cv::projectPoints(opoints, _rvec, _tvec, cameraMatrix, distCoeffs, projpoints);
+        projectPoints(opoints, _rvec, _tvec, cameraMatrix, distCoeffs, projpoints);
 
         const Point2f* ipoints_ptr = ipoints.ptr<Point2f>();
         const Point2f* projpoints_ptr = projpoints.ptr<Point2f>();
@@ -175,7 +176,7 @@ public:
         float* err = _err.getMat().ptr<float>();
 
         for ( i = 0; i < count; ++i)
-            err[i] = (float)cv::norm( ipoints_ptr[i] - projpoints_ptr[i] );
+            err[i] = (float)norm( ipoints_ptr[i] - projpoints_ptr[i] );
 
     }
 
@@ -188,7 +189,7 @@ public:
     Mat tvec;
 };
 
-bool cv::solvePnPRansac(InputArray _opoints, InputArray _ipoints,
+bool solvePnPRansac(InputArray _opoints, InputArray _ipoints,
                         InputArray _cameraMatrix, InputArray _distCoeffs,
                         OutputArray _rvec, OutputArray _tvec, bool useExtrinsicGuess,
                         int iterationsCount, float reprojectionError, double confidence,
@@ -214,23 +215,45 @@ bool cv::solvePnPRansac(InputArray _opoints, InputArray _ipoints,
     Mat tvec = useExtrinsicGuess ? _tvec.getMat() : Mat(3, 1, CV_64FC1);
     Mat cameraMatrix = _cameraMatrix.getMat(), distCoeffs = _distCoeffs.getMat();
 
-    Ptr<PointSetRegistrator::Callback> cb; // pointer to callback
-    cb = makePtr<PnPRansacCallback>( cameraMatrix, distCoeffs, flags, useExtrinsicGuess, rvec, tvec);
+    int model_points = 5;
+    int ransac_kernel_method = SOLVEPNP_EPNP;
 
-    int model_points = 4;                             // minimum of number of model points
-    if( flags == cv::SOLVEPNP_ITERATIVE ) model_points = 6;
-    else if( flags == cv::SOLVEPNP_UPNP ) model_points = 6;
-    else if( flags == cv::SOLVEPNP_EPNP ) model_points = 5;
+    if( npoints == 4 )
+    {
+        model_points = 4;
+        ransac_kernel_method = SOLVEPNP_P3P;
+    }
+
+    Ptr<PointSetRegistrator::Callback> cb; // pointer to callback
+    cb = makePtr<PnPRansacCallback>( cameraMatrix, distCoeffs, ransac_kernel_method, useExtrinsicGuess, rvec, tvec);
 
     double param1 = reprojectionError;                // reprojection error
     double param2 = confidence;                       // confidence
     int param3 = iterationsCount;                     // number maximum iterations
 
-    cv::Mat _local_model(3, 2, CV_64FC1);
-    cv::Mat _mask_local_inliers(1, opoints.rows, CV_8UC1);
+    Mat _local_model(3, 2, CV_64FC1);
+    Mat _mask_local_inliers(1, opoints.rows, CV_8UC1);
 
     // call Ransac
-    int result = createRANSACPointSetRegistrator(cb, model_points, param1, param2, param3)->run(opoints, ipoints, _local_model, _mask_local_inliers);
+    int result = createRANSACPointSetRegistrator(cb, model_points,
+        param1, param2, param3)->run(opoints, ipoints, _local_model, _mask_local_inliers);
+
+    if( result > 0 )
+    {
+        vector<Point3d> opoints_inliers;
+        vector<Point2d> ipoints_inliers;
+        opoints.convertTo(opoints_inliers, CV_64F);
+        ipoints.convertTo(ipoints_inliers, CV_64F);
+
+        const uchar* mask = _mask_local_inliers.ptr<uchar>();
+        int npoints1 = compressElems(&opoints_inliers[0], mask, 1, npoints);
+        compressElems(&ipoints_inliers[0], mask, 1, npoints);
+
+        opoints_inliers.resize(npoints1);
+        ipoints_inliers.resize(npoints1);
+        result = solvePnP(opoints_inliers, ipoints_inliers, cameraMatrix,
+                          distCoeffs, rvec, tvec, false, flags == SOLVEPNP_P3P ? SOLVEPNP_EPNP : flags) ? 1 : -1;
+    }
 
     if( result <= 0 || _local_model.rows <= 0)
     {
@@ -260,3 +283,5 @@ bool cv::solvePnPRansac(InputArray _opoints, InputArray _ipoints,
     }
     return true;
 }
+
+}
diff --git a/modules/calib3d/test/test_homography.cpp b/modules/calib3d/test/test_homography.cpp
index 59d92905a..a31b75d2b 100644
--- a/modules/calib3d/test/test_homography.cpp
+++ b/modules/calib3d/test/test_homography.cpp
@@ -62,10 +62,10 @@
 
 #define MAX_COUNT_OF_POINTS 303
 #define COUNT_NORM_TYPES 3
-#define METHODS_COUNT 3
+#define METHODS_COUNT 4
 
 int NORM_TYPE[COUNT_NORM_TYPES] = {cv::NORM_L1, cv::NORM_L2, cv::NORM_INF};
-int METHOD[METHODS_COUNT] = {0, cv::RANSAC, cv::LMEDS};
+int METHOD[METHODS_COUNT] = {0, cv::RANSAC, cv::LMEDS, cv::RHO};
 
 using namespace cv;
 using namespace std;
@@ -94,12 +94,12 @@ private:
 
     void print_information_1(int j, int N, int method, const Mat& H);
     void print_information_2(int j, int N, int method, const Mat& H, const Mat& H_res, int k, double diff);
-    void print_information_3(int j, int N, const Mat& mask);
+    void print_information_3(int method, int j, int N, const Mat& mask);
     void print_information_4(int method, int j, int N, int k, int l, double diff);
     void print_information_5(int method, int j, int N, int l, double diff);
-    void print_information_6(int j, int N, int k, double diff, bool value);
-    void print_information_7(int j, int N, int k, double diff, bool original_value, bool found_value);
-    void print_information_8(int j, int N, int k, int l, double diff);
+    void print_information_6(int method, int j, int N, int k, double diff, bool value);
+    void print_information_7(int method, int j, int N, int k, double diff, bool original_value, bool found_value);
+    void print_information_8(int method, int j, int N, int k, int l, double diff);
 };
 
 CV_HomographyTest::CV_HomographyTest() : max_diff(1e-2f), max_2diff(2e-2f)
@@ -144,7 +144,7 @@ void CV_HomographyTest::print_information_1(int j, int N, int _method, const Mat
     cout << "Type of srcPoints: "; if ((j>-1) && (j<2)) cout << "Mat of CV_32FC2"; else  cout << "vector <Point2f>";
     cout << "   Type of dstPoints: "; if (j % 2 == 0) cout << "Mat of CV_32FC2"; else cout << "vector <Point2f>"; cout << endl;
     cout << "Count of points: " << N << endl; cout << endl;
-    cout << "Method: "; if (_method == 0) cout << 0; else if (_method == 8) cout << "RANSAC"; else cout << "LMEDS"; cout << endl;
+    cout << "Method: "; if (_method == 0) cout << 0; else if (_method == 8) cout << "RANSAC"; else if (_method == cv::RHO) cout << "RHO"; else cout << "LMEDS"; cout << endl;
     cout << "Homography matrix:" << endl; cout << endl;
     cout << H << endl; cout << endl;
     cout << "Number of rows: " << H.rows << "   Number of cols: " << H.cols << endl; cout << endl;
@@ -156,7 +156,7 @@ void CV_HomographyTest::print_information_2(int j, int N, int _method, const Mat
     cout << "Type of srcPoints: "; if ((j>-1) && (j<2)) cout << "Mat of CV_32FC2"; else  cout << "vector <Point2f>";
     cout << "   Type of dstPoints: "; if (j % 2 == 0) cout << "Mat of CV_32FC2"; else cout << "vector <Point2f>"; cout << endl;
     cout << "Count of points: " << N << endl; cout << endl;
-    cout << "Method: "; if (_method == 0) cout << 0; else if (_method == 8) cout << "RANSAC"; else cout << "LMEDS"; cout << endl;
+    cout << "Method: "; if (_method == 0) cout << 0; else if (_method == 8) cout << "RANSAC"; else if (_method == cv::RHO) cout << "RHO"; else cout << "LMEDS"; cout << endl;
     cout << "Original matrix:" << endl; cout << endl;
     cout << H << endl; cout << endl;
     cout << "Found matrix:" << endl; cout << endl;
@@ -166,13 +166,13 @@ void CV_HomographyTest::print_information_2(int j, int N, int _method, const Mat
     cout << "Maximum allowed difference: " << max_diff << endl; cout << endl;
 }
 
-void CV_HomographyTest::print_information_3(int j, int N, const Mat& mask)
+void CV_HomographyTest::print_information_3(int _method, int j, int N, const Mat& mask)
 {
     cout << endl; cout << "Checking for inliers/outliers mask..." << endl; cout << endl;
     cout << "Type of srcPoints: "; if ((j>-1) && (j<2)) cout << "Mat of CV_32FC2"; else  cout << "vector <Point2f>";
     cout << "   Type of dstPoints: "; if (j % 2 == 0) cout << "Mat of CV_32FC2"; else cout << "vector <Point2f>"; cout << endl;
     cout << "Count of points: " << N << endl; cout << endl;
-    cout << "Method: RANSAC" << endl;
+    cout << "Method: "; if (_method == RANSAC) cout << "RANSAC" << endl; else if (_method == cv::RHO) cout << "RHO" << endl; else cout << _method << endl;
     cout << "Found mask:" << endl; cout << endl;
     cout << mask << endl; cout << endl;
     cout << "Number of rows: " << mask.rows << "   Number of cols: " << mask.cols << endl; cout << endl;
@@ -205,10 +205,10 @@ void CV_HomographyTest::print_information_5(int _method, int j, int N, int l, do
     cout << "Maxumum allowed difference: " << max_diff << endl; cout << endl;
 }
 
-void CV_HomographyTest::print_information_6(int j, int N, int k, double diff, bool value)
+void CV_HomographyTest::print_information_6(int _method, int j, int N, int k, double diff, bool value)
 {
     cout << endl; cout << "Checking for inliers/outliers mask..." << endl; cout << endl;
-    cout << "Method: RANSAC" << endl;
+    cout << "Method: "; if (_method == RANSAC) cout << "RANSAC" << endl; else if (_method == cv::RHO) cout << "RHO" << endl; else cout << _method << endl;
     cout << "Type of srcPoints: "; if ((j>-1) && (j<2)) cout << "Mat of CV_32FC2"; else  cout << "vector <Point2f>";
     cout << "   Type of dstPoints: "; if (j % 2 == 0) cout << "Mat of CV_32FC2"; else cout << "vector <Point2f>"; cout << endl;
     cout << "Count of points: " << N << "   " << endl;
@@ -218,10 +218,10 @@ void CV_HomographyTest::print_information_6(int j, int N, int k, double diff, bo
     cout << "Value of found mask: "<< value << endl; cout << endl;
 }
 
-void CV_HomographyTest::print_information_7(int j, int N, int k, double diff, bool original_value, bool found_value)
+void CV_HomographyTest::print_information_7(int _method, int j, int N, int k, double diff, bool original_value, bool found_value)
 {
     cout << endl; cout << "Checking for inliers/outliers mask..." << endl; cout << endl;
-    cout << "Method: RANSAC" << endl;
+    cout << "Method: "; if (_method == RANSAC) cout << "RANSAC" << endl; else if (_method == cv::RHO) cout << "RHO" << endl; else cout << _method << endl;
     cout << "Type of srcPoints: "; if ((j>-1) && (j<2)) cout << "Mat of CV_32FC2"; else  cout << "vector <Point2f>";
     cout << "   Type of dstPoints: "; if (j % 2 == 0) cout << "Mat of CV_32FC2"; else cout << "vector <Point2f>"; cout << endl;
     cout << "Count of points: " << N << "   " << endl;
@@ -231,10 +231,10 @@ void CV_HomographyTest::print_information_7(int j, int N, int k, double diff, bo
     cout << "Value of original mask: "<< original_value << "   Value of found mask: " << found_value << endl; cout << endl;
 }
 
-void CV_HomographyTest::print_information_8(int j, int N, int k, int l, double diff)
+void CV_HomographyTest::print_information_8(int _method, int j, int N, int k, int l, double diff)
 {
     cout << endl; cout << "Checking for reprojection error of inlier..." << endl; cout << endl;
-    cout << "Method: RANSAC" << endl;
+    cout << "Method: "; if (_method == RANSAC) cout << "RANSAC" << endl; else if (_method == cv::RHO) cout << "RHO" << endl; else cout << _method << endl;
     cout << "Sigma of normal noise: " << sigma << endl;
     cout << "Type of srcPoints: "; if ((j>-1) && (j<2)) cout << "Mat of CV_32FC2"; else  cout << "vector <Point2f>";
     cout << "   Type of dstPoints: "; if (j % 2 == 0) cout << "Mat of CV_32FC2"; else cout << "vector <Point2f>"; cout << endl;
@@ -339,14 +339,15 @@ void CV_HomographyTest::run(int)
 
                     continue;
                 }
+            case cv::RHO:
             case RANSAC:
                 {
                     cv::Mat mask [4]; double diff;
 
-                    Mat H_res_64 [4] = { cv::findHomography(src_mat_2f, dst_mat_2f, RANSAC, reproj_threshold, mask[0]),
-                                         cv::findHomography(src_mat_2f, dst_vec, RANSAC, reproj_threshold, mask[1]),
-                                         cv::findHomography(src_vec, dst_mat_2f, RANSAC, reproj_threshold, mask[2]),
-                                         cv::findHomography(src_vec, dst_vec, RANSAC, reproj_threshold, mask[3]) };
+                    Mat H_res_64 [4] = { cv::findHomography(src_mat_2f, dst_mat_2f, method, reproj_threshold, mask[0]),
+                                         cv::findHomography(src_mat_2f, dst_vec, method, reproj_threshold, mask[1]),
+                                         cv::findHomography(src_vec, dst_mat_2f, method, reproj_threshold, mask[2]),
+                                         cv::findHomography(src_vec, dst_vec, method, reproj_threshold, mask[3]) };
 
                     for (int j = 0; j < 4; ++j)
                     {
@@ -370,7 +371,7 @@ void CV_HomographyTest::run(int)
 
                         if (code)
                         {
-                            print_information_3(j, N, mask[j]);
+                            print_information_3(method, j, N, mask[j]);
 
                             switch (code)
                             {
@@ -466,14 +467,15 @@ void CV_HomographyTest::run(int)
 
                     continue;
                 }
+            case cv::RHO:
             case RANSAC:
                 {
                     cv::Mat mask_res [4];
 
-                    Mat H_res_64 [4] = { cv::findHomography(src_mat_2f, dst_mat_2f, RANSAC, reproj_threshold, mask_res[0]),
-                                         cv::findHomography(src_mat_2f, dst_vec, RANSAC, reproj_threshold, mask_res[1]),
-                                         cv::findHomography(src_vec, dst_mat_2f, RANSAC, reproj_threshold, mask_res[2]),
-                                         cv::findHomography(src_vec, dst_vec, RANSAC, reproj_threshold, mask_res[3]) };
+                    Mat H_res_64 [4] = { cv::findHomography(src_mat_2f, dst_mat_2f, method, reproj_threshold, mask_res[0]),
+                                         cv::findHomography(src_mat_2f, dst_vec, method, reproj_threshold, mask_res[1]),
+                                         cv::findHomography(src_vec, dst_mat_2f, method, reproj_threshold, mask_res[2]),
+                                         cv::findHomography(src_vec, dst_vec, method, reproj_threshold, mask_res[3]) };
 
                     for (int j = 0; j < 4; ++j)
                     {
@@ -488,7 +490,7 @@ void CV_HomographyTest::run(int)
 
                         if (code)
                         {
-                            print_information_3(j, N, mask_res[j]);
+                            print_information_3(method, j, N, mask_res[j]);
 
                             switch (code)
                             {
@@ -520,14 +522,14 @@ void CV_HomographyTest::run(int)
 
                             if (mask_res[j].at<bool>(k, 0) != (diff <= reproj_threshold))
                             {
-                                print_information_6(j, N, k, diff, mask_res[j].at<bool>(k, 0));
+                                print_information_6(method, j, N, k, diff, mask_res[j].at<bool>(k, 0));
                                 CV_Error(CALIB3D_HOMOGRAPHY_ERROR_RANSAC_MASK, MESSAGE_RANSAC_MASK_4);
                                 return;
                             }
 
                             if (mask.at<bool>(k, 0) && !mask_res[j].at<bool>(k, 0))
                             {
-                                print_information_7(j, N, k, diff, mask.at<bool>(k, 0), mask_res[j].at<bool>(k, 0));
+                                print_information_7(method, j, N, k, diff, mask.at<bool>(k, 0), mask_res[j].at<bool>(k, 0));
                                 CV_Error(CALIB3D_HOMOGRAPHY_ERROR_RANSAC_MASK, MESSAGE_RANSAC_MASK_5);
                                 return;
                             }
@@ -547,7 +549,7 @@ void CV_HomographyTest::run(int)
 
                                     if (diff - cv::norm(noise_2d, NORM_TYPE[l]) > max_2diff)
                                     {
-                                        print_information_8(j, N, k, l, diff - cv::norm(noise_2d, NORM_TYPE[l]));
+                                        print_information_8(method, j, N, k, l, diff - cv::norm(noise_2d, NORM_TYPE[l]));
                                         CV_Error(CALIB3D_HOMOGRAPHY_ERROR_RANSAC_DIFF, MESSAGE_RANSAC_DIFF);
                                         return;
                                     }
diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp
index 896aa12bf..2a51de22e 100644
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@@ -985,22 +985,131 @@ horizontal axis.
   */
 CV_EXPORTS Mat repeat(const Mat& src, int ny, int nx);
 
-/** @brief concatenate matrices horizontally
-@todo document
+/** @brief Applies horizontal concatenation to given matrices.
+
+The function horizontally concatenates two or more cv::Mat matrices (with the same number of rows).
+@code{.cpp}
+    cv::Mat matArray[] = { cv::Mat(4, 1, CV_8UC1, cv::Scalar(1)),
+                           cv::Mat(4, 1, CV_8UC1, cv::Scalar(2)),
+                           cv::Mat(4, 1, CV_8UC1, cv::Scalar(3)),};
+
+    cv::Mat out;
+    cv::hconcat( matArray, 3, out );
+    //out:
+    //[1, 2, 3;
+    // 1, 2, 3;
+    // 1, 2, 3;
+    // 1, 2, 3]
+@endcode
+@param src input array or vector of matrices. all of the matrices must have the same number of rows and the same depth.
+@param nsrc number of matrices in src.
+@param dst output array. It has the same number of rows and depth as the src, and the sum of cols of the src.
+@sa cv::vconcat(const Mat*, size_t, OutputArray), @sa cv::vconcat(InputArrayOfArrays, OutputArray) and @sa cv::vconcat(InputArray, InputArray, OutputArray)
 */
 CV_EXPORTS void hconcat(const Mat* src, size_t nsrc, OutputArray dst);
-/** @overload */
+/** @overload
+ @code{.cpp}
+    cv::Mat_<float> A = (cv::Mat_<float>(3, 2) << 1, 4,
+                                                  2, 5,
+                                                  3, 6);
+    cv::Mat_<float> B = (cv::Mat_<float>(3, 2) << 7, 10,
+                                                  8, 11,
+                                                  9, 12);
+
+    cv::Mat C;
+    cv::hconcat(A, B, C);
+    //C:
+    //[1, 4, 7, 10;
+    // 2, 5, 8, 11;
+    // 3, 6, 9, 12]
+ @endcode
+ @param src1 first input array to be considered for horizontal concatenation.
+ @param src2 second input array to be considered for horizontal concatenation.
+ @param dst output array. It has the same number of rows and depth as the src1 and src2, and the sum of cols of the src1 and src2.
+ */
 CV_EXPORTS void hconcat(InputArray src1, InputArray src2, OutputArray dst);
-/** @overload */
+/** @overload
+ @code{.cpp}
+    std::vector<cv::Mat> matrices = { cv::Mat(4, 1, CV_8UC1, cv::Scalar(1)),
+                                      cv::Mat(4, 1, CV_8UC1, cv::Scalar(2)),
+                                      cv::Mat(4, 1, CV_8UC1, cv::Scalar(3)),};
+
+    cv::Mat out;
+    cv::hconcat( matrices, out );
+    //out:
+    //[1, 2, 3;
+    // 1, 2, 3;
+    // 1, 2, 3;
+    // 1, 2, 3]
+ @endcode
+ @param src input array or vector of matrices. all of the matrices must have the same number of rows and the same depth.
+ @param dst output array. It has the same number of rows and depth as the src, and the sum of cols of the src.
+same depth.
+ */
 CV_EXPORTS_W void hconcat(InputArrayOfArrays src, OutputArray dst);
 
-/** @brief concatenate matrices vertically
-@todo document
+/** @brief Applies vertical concatenation to given matrices.
+
+The function vertically concatenates two or more cv::Mat matrices (with the same number of cols).
+@code{.cpp}
+    cv::Mat matArray[] = { cv::Mat(1, 4, CV_8UC1, cv::Scalar(1)),
+                           cv::Mat(1, 4, CV_8UC1, cv::Scalar(2)),
+                           cv::Mat(1, 4, CV_8UC1, cv::Scalar(3)),};
+
+    cv::Mat out;
+    cv::vconcat( matArray, 3, out );
+    //out:
+    //[1,   1,   1,   1;
+    // 2,   2,   2,   2;
+    // 3,   3,   3,   3]
+@endcode
+@param src input array or vector of matrices. all of the matrices must have the same number of cols and the same depth.
+@param nsrc number of matrices in src.
+@param dst output array. It has the same number of cols and depth as the src, and the sum of rows of the src.
+@sa cv::hconcat(const Mat*, size_t, OutputArray), @sa cv::hconcat(InputArrayOfArrays, OutputArray) and @sa cv::hconcat(InputArray, InputArray, OutputArray)
 */
 CV_EXPORTS void vconcat(const Mat* src, size_t nsrc, OutputArray dst);
-/** @overload */
+/** @overload
+ @code{.cpp}
+    cv::Mat_<float> A = (cv::Mat_<float>(3, 2) << 1, 7,
+                                                  2, 8,
+                                                  3, 9);
+    cv::Mat_<float> B = (cv::Mat_<float>(3, 2) << 4, 10,
+                                                  5, 11,
+                                                  6, 12);
+
+    cv::Mat C;
+    cv::vconcat(A, B, C);
+    //C:
+    //[1, 7;
+    // 2, 8;
+    // 3, 9;
+    // 4, 10;
+    // 5, 11;
+    // 6, 12]
+ @endcode
+ @param src1 first input array to be considered for vertical concatenation.
+ @param src2 second input array to be considered for vertical concatenation.
+ @param dst output array. It has the same number of cols and depth as the src1 and src2, and the sum of rows of the src1 and src2.
+ */
 CV_EXPORTS void vconcat(InputArray src1, InputArray src2, OutputArray dst);
-/** @overload */
+/** @overload
+ @code{.cpp}
+    std::vector<cv::Mat> matrices = { cv::Mat(1, 4, CV_8UC1, cv::Scalar(1)),
+                                      cv::Mat(1, 4, CV_8UC1, cv::Scalar(2)),
+                                      cv::Mat(1, 4, CV_8UC1, cv::Scalar(3)),};
+
+    cv::Mat out;
+    cv::vconcat( matrices, out );
+    //out:
+    //[1,   1,   1,   1;
+    // 2,   2,   2,   2;
+    // 3,   3,   3,   3]
+ @endcode
+ @param src input array or vector of matrices. all of the matrices must have the same number of cols and the same depth
+ @param dst output array. It has the same number of cols and depth as the src, and the sum of rows of the src.
+same depth.
+ */
 CV_EXPORTS_W void vconcat(InputArrayOfArrays src, OutputArray dst);
 
 /** @brief computes bitwise conjunction of the two arrays (dst = src1 & src2)
@@ -2821,41 +2930,6 @@ public:
     virtual void read(const FileNode& fn) { (void)fn; }
 };
 
-// define properties
-
-#define CV_PURE_PROPERTY(type, name) \
-    CV_WRAP virtual type get##name() const = 0; \
-    CV_WRAP virtual void set##name(type val) = 0;
-
-#define CV_PURE_PROPERTY_S(type, name) \
-    CV_WRAP virtual type get##name() const = 0; \
-    CV_WRAP virtual void set##name(const type & val) = 0;
-
-#define CV_PURE_PROPERTY_RO(type, name) \
-    CV_WRAP virtual type get##name() const = 0;
-
-// basic property implementation
-
-#define CV_IMPL_PROPERTY_RO(type, name, member) \
-    inline type get##name() const { return member; }
-
-#define CV_HELP_IMPL_PROPERTY(r_type, w_type, name, member) \
-    CV_IMPL_PROPERTY_RO(r_type, name, member) \
-    inline void set##name(w_type val) { member = val; }
-
-#define CV_HELP_WRAP_PROPERTY(r_type, w_type, name, internal_name, internal_obj) \
-    r_type get##name() const { return internal_obj.get##internal_name(); } \
-    void set##name(w_type val) { internal_obj.set##internal_name(val); }
-
-#define CV_IMPL_PROPERTY(type, name, member) CV_HELP_IMPL_PROPERTY(type, type, name, member)
-#define CV_IMPL_PROPERTY_S(type, name, member) CV_HELP_IMPL_PROPERTY(type, const type &, name, member)
-
-#define CV_WRAP_PROPERTY(type, name, internal_name, internal_obj)  CV_HELP_WRAP_PROPERTY(type, type, name, internal_name, internal_obj)
-#define CV_WRAP_PROPERTY_S(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, const type &, name, internal_name, internal_obj)
-
-#define CV_WRAP_SAME_PROPERTY(type, name, internal_obj) CV_WRAP_PROPERTY(type, name, name, internal_obj)
-#define CV_WRAP_SAME_PROPERTY_S(type, name, internal_obj) CV_WRAP_PROPERTY_S(type, name, name, internal_obj)
-
 struct Param {
     enum { INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7,
            UNSIGNED_INT=8, UINT64=9, UCHAR=11 };
diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp
index f2acaa3fb..73beb911f 100644
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@@ -442,6 +442,10 @@ template<typename _Tp> static inline _Tp saturate_cast(int v)      { return _Tp(
 template<typename _Tp> static inline _Tp saturate_cast(float v)    { return _Tp(v); }
 /** @overload */
 template<typename _Tp> static inline _Tp saturate_cast(double v)   { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(int64 v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(uint64 v)   { return _Tp(v); }
 
 //! @cond IGNORED
 
@@ -452,6 +456,8 @@ template<> inline uchar saturate_cast<uchar>(short v)        { return saturate_c
 template<> inline uchar saturate_cast<uchar>(unsigned v)     { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
 template<> inline uchar saturate_cast<uchar>(float v)        { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
 template<> inline uchar saturate_cast<uchar>(double v)       { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
+template<> inline uchar saturate_cast<uchar>(int64 v)        { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
+template<> inline uchar saturate_cast<uchar>(uint64 v)       { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
 
 template<> inline schar saturate_cast<schar>(uchar v)        { return (schar)std::min((int)v, SCHAR_MAX); }
 template<> inline schar saturate_cast<schar>(ushort v)       { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
@@ -460,6 +466,8 @@ template<> inline schar saturate_cast<schar>(short v)        { return saturate_c
 template<> inline schar saturate_cast<schar>(unsigned v)     { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
 template<> inline schar saturate_cast<schar>(float v)        { int iv = cvRound(v); return saturate_cast<schar>(iv); }
 template<> inline schar saturate_cast<schar>(double v)       { int iv = cvRound(v); return saturate_cast<schar>(iv); }
+template<> inline schar saturate_cast<schar>(int64 v)        { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
+template<> inline schar saturate_cast<schar>(uint64 v)       { return (schar)std::min(v, (uint64)SCHAR_MAX); }
 
 template<> inline ushort saturate_cast<ushort>(schar v)      { return (ushort)std::max((int)v, 0); }
 template<> inline ushort saturate_cast<ushort>(short v)      { return (ushort)std::max((int)v, 0); }
@@ -467,12 +475,16 @@ template<> inline ushort saturate_cast<ushort>(int v)        { return (ushort)((
 template<> inline ushort saturate_cast<ushort>(unsigned v)   { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
 template<> inline ushort saturate_cast<ushort>(float v)      { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
 template<> inline ushort saturate_cast<ushort>(double v)     { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
+template<> inline ushort saturate_cast<ushort>(int64 v)      { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
+template<> inline ushort saturate_cast<ushort>(uint64 v)     { return (ushort)std::min(v, (uint64)USHRT_MAX); }
 
 template<> inline short saturate_cast<short>(ushort v)       { return (short)std::min((int)v, SHRT_MAX); }
 template<> inline short saturate_cast<short>(int v)          { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
 template<> inline short saturate_cast<short>(unsigned v)     { return (short)std::min(v, (unsigned)SHRT_MAX); }
 template<> inline short saturate_cast<short>(float v)        { int iv = cvRound(v); return saturate_cast<short>(iv); }
 template<> inline short saturate_cast<short>(double v)       { int iv = cvRound(v); return saturate_cast<short>(iv); }
+template<> inline short saturate_cast<short>(int64 v)        { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
+template<> inline short saturate_cast<short>(uint64 v)       { return (short)std::min(v, (uint64)SHRT_MAX); }
 
 template<> inline int saturate_cast<int>(float v)            { return cvRound(v); }
 template<> inline int saturate_cast<int>(double v)           { return cvRound(v); }
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index 892c59d3b..3498b0918 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -480,16 +480,14 @@ CV_INLINE int cvRound( double value )
         fistp t;
     }
     return t;
-#elif defined _MSC_VER && defined _M_ARM && defined HAVE_TEGRA_OPTIMIZATION
-    TEGRA_ROUND(value);
+#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
+    TEGRA_ROUND_DBL(value);
 #elif defined CV_ICC || defined __GNUC__
-#  ifdef HAVE_TEGRA_OPTIMIZATION
-    TEGRA_ROUND(value);
-#  elif CV_VFP
+# if CV_VFP
     ARM_ROUND_DBL(value)
-#  else
+# else
     return (int)lrint(value);
-#  endif
+# endif
 #else
     double intpart, fractpart;
     fractpart = modf(value, &intpart);
@@ -505,7 +503,9 @@ CV_INLINE int cvRound( double value )
 /** @overload */
 CV_INLINE int cvRound(float value)
 {
-#if CV_VFP && !defined HAVE_TEGRA_OPTIMIZATION
+#if defined ANDROID && (defined CV_ICC || defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
+    TEGRA_ROUND_FLT(value);
+#elif CV_VFP && !defined HAVE_TEGRA_OPTIMIZATION
     ARM_ROUND_FLT(value)
 #else
     return cvRound((double)value);
diff --git a/modules/core/include/opencv2/core/cvstd.hpp b/modules/core/include/opencv2/core/cvstd.hpp
index 0da8faf3d..a229f533e 100644
--- a/modules/core/include/opencv2/core/cvstd.hpp
+++ b/modules/core/include/opencv2/core/cvstd.hpp
@@ -1040,9 +1040,11 @@ static inline bool operator>= (const String& lhs, const char*   rhs) { return lh
 
 #ifndef OPENCV_NOSTL_TRANSITIONAL
 namespace std
+{
+    static inline void swap(cv::String& a, cv::String& b) { a.swap(b); }
+}
 #else
 namespace cv
-#endif
 {
     template<> inline
     void swap<cv::String>(cv::String& a, cv::String& b)
@@ -1050,6 +1052,7 @@ namespace cv
         a.swap(b);
     }
 }
+#endif
 
 #include "opencv2/core/ptr.inl.hpp"
 
diff --git a/modules/core/include/opencv2/core/persistence.hpp b/modules/core/include/opencv2/core/persistence.hpp
index fd691cc6c..eec304d6b 100644
--- a/modules/core/include/opencv2/core/persistence.hpp
+++ b/modules/core/include/opencv2/core/persistence.hpp
@@ -257,7 +257,7 @@ Here is how to read the file created by the code sample above:
             cout << " " << (int)lbpval[i];
         cout << ")" << endl;
     }
-    fs.release();
+    fs2.release();
 @endcode
 
 Format specification    {#format_spec}
diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp
index 03d69bd43..58d78e584 100644
--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@@ -172,6 +172,27 @@ namespace cv
 CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int unroll_to = 0);
 }
 
+// property implementation macros
+
+#define CV_IMPL_PROPERTY_RO(type, name, member) \
+    inline type get##name() const { return member; }
+
+#define CV_HELP_IMPL_PROPERTY(r_type, w_type, name, member) \
+    CV_IMPL_PROPERTY_RO(r_type, name, member) \
+    inline void set##name(w_type val) { member = val; }
+
+#define CV_HELP_WRAP_PROPERTY(r_type, w_type, name, internal_name, internal_obj) \
+    r_type get##name() const { return internal_obj.get##internal_name(); } \
+    void set##name(w_type val) { internal_obj.set##internal_name(val); }
+
+#define CV_IMPL_PROPERTY(type, name, member) CV_HELP_IMPL_PROPERTY(type, type, name, member)
+#define CV_IMPL_PROPERTY_S(type, name, member) CV_HELP_IMPL_PROPERTY(type, const type &, name, member)
+
+#define CV_WRAP_PROPERTY(type, name, internal_name, internal_obj)  CV_HELP_WRAP_PROPERTY(type, type, name, internal_name, internal_obj)
+#define CV_WRAP_PROPERTY_S(type, name, internal_name, internal_obj) CV_HELP_WRAP_PROPERTY(type, const type &, name, internal_name, internal_obj)
+
+#define CV_WRAP_SAME_PROPERTY(type, name, internal_obj) CV_WRAP_PROPERTY(type, name, name, internal_obj)
+#define CV_WRAP_SAME_PROPERTY_S(type, name, internal_obj) CV_WRAP_PROPERTY_S(type, name, name, internal_obj)
 
 /****************************************************************************************\
 *                     Structures and macros for integration with IPP                     *
diff --git a/modules/core/include/opencv2/core/types_c.h b/modules/core/include/opencv2/core/types_c.h
index e82470180..16e613053 100644
--- a/modules/core/include/opencv2/core/types_c.h
+++ b/modules/core/include/opencv2/core/types_c.h
@@ -133,7 +133,7 @@ typedef int CVStatus;
 
 /** @see cv::Error::Code */
 enum {
- CV_StsOk=                       0,  /**< everithing is ok                */
+ CV_StsOk=                       0,  /**< everything is ok                */
  CV_StsBackTrace=               -1,  /**< pseudo error for back trace     */
  CV_StsError=                   -2,  /**< unknown /unspecified error      */
  CV_StsInternal=                -3,  /**< internal error (bad state)      */
@@ -143,28 +143,28 @@ enum {
  CV_StsNoConv=                  -7,  /**< iter. didn't converge           */
  CV_StsAutoTrace=               -8,  /**< tracing                         */
  CV_HeaderIsNull=               -9,  /**< image header is NULL            */
- CV_BadImageSize=              -10, /**< image size is invalid           */
- CV_BadOffset=                 -11, /**< offset is invalid               */
- CV_BadDataPtr=                -12, /**/
- CV_BadStep=                   -13, /**/
- CV_BadModelOrChSeq=           -14, /**/
- CV_BadNumChannels=            -15, /**/
- CV_BadNumChannel1U=           -16, /**/
- CV_BadDepth=                  -17, /**/
- CV_BadAlphaChannel=           -18, /**/
- CV_BadOrder=                  -19, /**/
- CV_BadOrigin=                 -20, /**/
- CV_BadAlign=                  -21, /**/
- CV_BadCallBack=               -22, /**/
- CV_BadTileSize=               -23, /**/
- CV_BadCOI=                    -24, /**/
- CV_BadROISize=                -25, /**/
- CV_MaskIsTiled=               -26, /**/
- CV_StsNullPtr=                -27, /**< null pointer */
- CV_StsVecLengthErr=           -28, /**< incorrect vector length */
- CV_StsFilterStructContentErr= -29, /**< incorr. filter structure content */
- CV_StsKernelStructContentErr= -30, /**< incorr. transform kernel content */
- CV_StsFilterOffsetErr=        -31, /**< incorrect filter offset value */
+ CV_BadImageSize=              -10,  /**< image size is invalid           */
+ CV_BadOffset=                 -11,  /**< offset is invalid               */
+ CV_BadDataPtr=                -12,  /**/
+ CV_BadStep=                   -13,  /**/
+ CV_BadModelOrChSeq=           -14,  /**/
+ CV_BadNumChannels=            -15,  /**/
+ CV_BadNumChannel1U=           -16,  /**/
+ CV_BadDepth=                  -17,  /**/
+ CV_BadAlphaChannel=           -18,  /**/
+ CV_BadOrder=                  -19,  /**/
+ CV_BadOrigin=                 -20,  /**/
+ CV_BadAlign=                  -21,  /**/
+ CV_BadCallBack=               -22,  /**/
+ CV_BadTileSize=               -23,  /**/
+ CV_BadCOI=                    -24,  /**/
+ CV_BadROISize=                -25,  /**/
+ CV_MaskIsTiled=               -26,  /**/
+ CV_StsNullPtr=                -27,  /**< null pointer */
+ CV_StsVecLengthErr=           -28,  /**< incorrect vector length */
+ CV_StsFilterStructContentErr= -29,  /**< incorr. filter structure content */
+ CV_StsKernelStructContentErr= -30,  /**< incorr. transform kernel content */
+ CV_StsFilterOffsetErr=        -31,  /**< incorrect filter offset value */
  CV_StsBadSize=                -201, /**< the input/output structure size is incorrect  */
  CV_StsDivByZero=              -202, /**< division by zero */
  CV_StsInplaceNotSupported=    -203, /**< in-place operation is not supported */
diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp
index f89560a80..e6dfd7a4f 100644
--- a/modules/core/include/opencv2/core/utility.hpp
+++ b/modules/core/include/opencv2/core/utility.hpp
@@ -61,7 +61,7 @@ CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation a
 // Each implementation entry correspond to function name entry, so you can find which implementation was executed in which fucntion
 CV_EXPORTS int getImpl(std::vector<int> &impl, std::vector<String> &funName);
 
-CV_EXPORTS bool useCollection(); // return implementation colelction state
+CV_EXPORTS bool useCollection(); // return implementation collection state
 CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state
 
 #define CV_IMPL_PLAIN  0x01 // native CPU OpenCV implementation
diff --git a/modules/core/perf/perf_cvround.cpp b/modules/core/perf/perf_cvround.cpp
new file mode 100644
index 000000000..e9db32354
--- /dev/null
+++ b/modules/core/perf/perf_cvround.cpp
@@ -0,0 +1,45 @@
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace perf;
+using std::tr1::make_tuple;
+using std::tr1::get;
+
+template <typename T>
+static void CvRoundMat(const cv::Mat & src, cv::Mat & dst)
+{
+    for (int y = 0; y < dst.rows; ++y)
+    {
+        const T * sptr = src.ptr<T>(y);
+        int * dptr = dst.ptr<int>(y);
+
+        for (int x = 0; x < dst.cols; ++x)
+            dptr[x] = cvRound(sptr[x]);
+    }
+}
+
+PERF_TEST_P(Size_MatType, CvRound_Float,
+            testing::Combine(testing::Values(TYPICAL_MAT_SIZES),
+                             testing::Values(CV_32FC1, CV_64FC1)))
+{
+    Size size = get<0>(GetParam());
+    int type = get<1>(GetParam()), depth = CV_MAT_DEPTH(type);
+
+    cv::Mat src(size, type), dst(size, CV_32SC1);
+
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (depth == CV_32F)
+    {
+        TEST_CYCLE()
+            CvRoundMat<float>(src, dst);
+    }
+    else if (depth == CV_64F)
+    {
+        TEST_CYCLE()
+            CvRoundMat<double>(src, dst);
+    }
+
+    SANITY_CHECK_NOTHING();
+}
diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp
index b3cde84df..129127820 100644
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@@ -2449,7 +2449,6 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
         (DFTFunc)CCSIDFT_64f
     };
     AutoBuffer<uchar> buf;
-    void *spec = 0;
     Mat src0 = _src0.getMat(), src = src0;
     int prev_len = 0, stage = 0;
     bool inv = (flags & DFT_INVERSE) != 0;
@@ -2570,7 +2569,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
             sz = 2*len*complex_elem_size;
         }
 
-        spec = 0;
+        void *spec = 0;
 #ifdef USE_IPP_DFT
         if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available
         {
diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp
index 6cfd5baa8..a766e5f2e 100644
--- a/modules/core/src/lapack.cpp
+++ b/modules/core/src/lapack.cpp
@@ -725,11 +725,11 @@ template<typename T1, typename T2, typename T3> static void
 MatrAXPY( int m, int n, const T1* x, int dx,
          const T2* a, int inca, T3* y, int dy )
 {
-    int i, j;
+    int i;
     for( i = 0; i < m; i++, x += dx, y += dy )
     {
         T2 s = a[i*inca];
-        j=0;
+        int j = 0;
          #if CV_ENABLE_UNROLLED
         for(; j <= n - 4; j += 4 )
         {
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index 6a36ddbae..5d68a3683 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -3957,6 +3957,7 @@ public:
             derived()._releaseBufferEntry(entry);
         }
         reservedEntries_.clear();
+        currentReservedSize = 0;
     }
 };
 
diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp
index 4dde2adea..582c7cd03 100644
--- a/modules/core/src/parallel.cpp
+++ b/modules/core/src/parallel.cpp
@@ -69,7 +69,7 @@
     #define HAVE_GCD
 #endif
 
-#if defined _MSC_VER && _MSC_VER >= 1600 && !defined(WINRT)
+#if defined _MSC_VER && _MSC_VER >= 1600
     #define HAVE_CONCURRENCY
 #endif
 
@@ -78,7 +78,8 @@
    2. HAVE_CSTRIPES    - 3rdparty library, should be explicitly enabled
    3. HAVE_OPENMP      - integrated to compiler, should be explicitly enabled
    4. HAVE_GCD         - system wide, used automatically        (APPLE only)
-   5. HAVE_CONCURRENCY - part of runtime, used automatically    (Windows only - MSVS 10, MSVS 11)
+   5. WINRT            - system wide, used automatically        (Windows RT only)
+   6. HAVE_CONCURRENCY - part of runtime, used automatically    (Windows only - MSVS 10, MSVS 11)
 */
 
 #if defined HAVE_TBB
@@ -105,6 +106,8 @@
     #elif defined HAVE_GCD
         #include <dispatch/dispatch.h>
         #include <pthread.h>
+    #elif defined WINRT
+        #include <ppltasks.h>
     #elif defined HAVE_CONCURRENCY
         #include <ppl.h>
     #endif
@@ -118,6 +121,8 @@
 #  define CV_PARALLEL_FRAMEWORK "openmp"
 #elif defined HAVE_GCD
 #  define CV_PARALLEL_FRAMEWORK "gcd"
+#elif defined WINRT
+#  define CV_PARALLEL_FRAMEWORK "winrt-concurrency"
 #elif defined HAVE_CONCURRENCY
 #  define CV_PARALLEL_FRAMEWORK "ms-concurrency"
 #endif
@@ -179,7 +184,7 @@ namespace
         ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
         (*ptr_body)(cv::Range((int)index, (int)index + 1));
     }
-#elif defined HAVE_CONCURRENCY
+#elif defined WINRT || defined HAVE_CONCURRENCY
     class ProxyLoopBody : public ParallelLoopBodyWrapper
     {
     public:
@@ -206,7 +211,10 @@ static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred)
 static int numThreadsMax = omp_get_max_threads();
 #elif defined HAVE_GCD
 // nothing for GCD
+#elif defined WINRT
+// nothing for WINRT
 #elif defined HAVE_CONCURRENCY
+
 class SchedPtr
 {
     Concurrency::Scheduler* sched_;
@@ -224,6 +232,7 @@ public:
     ~SchedPtr() { *this = 0; }
 };
 static SchedPtr pplScheduler;
+
 #endif
 
 #endif // CV_PARALLEL_FRAMEWORK
@@ -272,6 +281,10 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body,
         dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
         dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
 
+#elif defined WINRT
+
+        Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
+
 #elif defined HAVE_CONCURRENCY
 
         if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
@@ -330,11 +343,15 @@ int cv::getNumThreads(void)
 
     return 512; // the GCD thread pool limit
 
+#elif defined WINRT
+
+    return 0;
+
 #elif defined HAVE_CONCURRENCY
 
     return 1 + (pplScheduler == 0
-                ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
-                : pplScheduler->GetNumberOfVirtualProcessors());
+        ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
+        : pplScheduler->GetNumberOfVirtualProcessors());
 
 #else
 
@@ -371,6 +388,10 @@ void cv::setNumThreads( int threads )
     // unsupported
     // there is only private dispatch_queue_set_width() and only for desktop
 
+#elif defined WINRT
+
+    return;
+
 #elif defined HAVE_CONCURRENCY
 
     if (threads <= 0)
@@ -407,6 +428,8 @@ int cv::getThreadNum(void)
     return omp_get_thread_num();
 #elif defined HAVE_GCD
     return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
+#elif defined WINRT
+    return 0;
 #elif defined HAVE_CONCURRENCY
     return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
 #else
diff --git a/modules/core/src/pca.cpp b/modules/core/src/pca.cpp
index 95efd5718..85ba44324 100644
--- a/modules/core/src/pca.cpp
+++ b/modules/core/src/pca.cpp
@@ -66,7 +66,7 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp
 {
     Mat data = _data.getMat(), _mean = __mean.getMat();
     int covar_flags = CV_COVAR_SCALE;
-    int i, len, in_count;
+    int len, in_count;
     Size mean_sz;
 
     CV_Assert( data.channels() == 1 );
@@ -131,6 +131,7 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp
         eigenvectors = evects1;
 
         // normalize eigenvectors
+        int i;
         for( i = 0; i < out_count; i++ )
         {
             Mat vec = eigenvectors.row(i);
@@ -202,7 +203,7 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, double reta
 {
     Mat data = _data.getMat(), _mean = __mean.getMat();
     int covar_flags = CV_COVAR_SCALE;
-    int i, len, in_count;
+    int len, in_count;
     Size mean_sz;
 
     CV_Assert( data.channels() == 1 );
@@ -266,6 +267,7 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, double reta
         eigenvectors = evects1;
 
         // normalize all eigenvectors
+        int i;
         for( i = 0; i < eigenvectors.rows; i++ )
         {
             Mat vec = eigenvectors.row(i);
diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp
index a57849012..fa6120371 100644
--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
@@ -232,15 +232,30 @@ inline bool checkScalar(InputArray sc, int atype, int sckind, int akind)
 
 void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize );
 
+#ifdef CV_COLLECT_IMPL_DATA
+struct ImplCollector
+{
+    ImplCollector()
+    {
+        useCollection   = false;
+        implFlags       = 0;
+    }
+    bool useCollection; // enable/disable impl data collection
+
+    int implFlags;
+    std::vector<int>    implCode;
+    std::vector<String> implFun;
+
+    cv::Mutex mutex;
+};
+#endif
+
 struct CoreTLSData
 {
-    CoreTLSData() : device(0), useOpenCL(-1), useIPP(-1), useCollection(false)
+    CoreTLSData() : device(0), useOpenCL(-1), useIPP(-1)
     {
 #ifdef HAVE_TEGRA_OPTIMIZATION
         useTegra = -1;
-#endif
-#ifdef CV_COLLECT_IMPL_DATA
-        implFlags = 0;
 #endif
     }
 
@@ -251,13 +266,6 @@ struct CoreTLSData
     int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized
 #ifdef HAVE_TEGRA_OPTIMIZATION
     int useTegra; // 1 - use, 0 - do not use, -1 - auto/not initialized
-#endif
-    bool useCollection; // enable/disable impl data collection
-
-#ifdef CV_COLLECT_IMPL_DATA
-    int implFlags;
-    std::vector<int> implCode;
-    std::vector<String> implFun;
 #endif
 };
 
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index cefae8cb8..46f41dcca 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -1163,47 +1163,56 @@ TLSData<CoreTLSData>& getCoreTlsData()
 
 
 #ifdef CV_COLLECT_IMPL_DATA
+ImplCollector& getImplData()
+{
+    static ImplCollector *value = new ImplCollector();
+    return *value;
+}
+
 void setImpl(int flags)
 {
-    CoreTLSData* data = getCoreTlsData().get();
-    data->implFlags = flags;
-    data->implCode.clear();
-    data->implFun.clear();
+    cv::AutoLock lock(getImplData().mutex);
+
+    getImplData().implFlags = flags;
+    getImplData().implCode.clear();
+    getImplData().implFun.clear();
 }
 
 void addImpl(int flag, const char* func)
 {
-    CoreTLSData* data = getCoreTlsData().get();
-    data->implFlags |= flag;
+    cv::AutoLock lock(getImplData().mutex);
+
+    getImplData().implFlags |= flag;
     if(func) // use lazy collection if name was not specified
     {
-        size_t index = data->implCode.size();
-        if(!index || (data->implCode[index-1] != flag || data->implFun[index-1].compare(func))) // avoid duplicates
+        size_t index = getImplData().implCode.size();
+        if(!index || (getImplData().implCode[index-1] != flag || getImplData().implFun[index-1].compare(func))) // avoid duplicates
         {
-            data->implCode.push_back(flag);
-            data->implFun.push_back(func);
+            getImplData().implCode.push_back(flag);
+            getImplData().implFun.push_back(func);
         }
     }
 }
 
 int getImpl(std::vector<int> &impl, std::vector<String> &funName)
 {
-    CoreTLSData* data = getCoreTlsData().get();
-    impl = data->implCode;
-    funName = data->implFun;
-    return data->implFlags; // return actual flags for lazy collection
+    cv::AutoLock lock(getImplData().mutex);
+
+    impl    = getImplData().implCode;
+    funName = getImplData().implFun;
+    return getImplData().implFlags; // return actual flags for lazy collection
 }
 
 bool useCollection()
 {
-    CoreTLSData* data = getCoreTlsData().get();
-    return data->useCollection;
+    return getImplData().useCollection;
 }
 
 void setUseCollection(bool flag)
 {
-    CoreTLSData* data = getCoreTlsData().get();
-    data->useCollection = flag;
+    cv::AutoLock lock(getImplData().mutex);
+
+    getImplData().useCollection = flag;
 }
 #endif
 
diff --git a/modules/cudalegacy/src/bm_fast.cpp b/modules/cudalegacy/src/bm_fast.cpp
index c418e4bc9..ecb87908f 100644
--- a/modules/cudalegacy/src/bm_fast.cpp
+++ b/modules/cudalegacy/src/bm_fast.cpp
@@ -45,7 +45,7 @@
 using namespace cv;
 using namespace cv::cuda;
 
-#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
+#if !defined HAVE_CUDA || !defined(HAVE_OPENCV_CUDAARITHM) || defined(CUDA_DISABLER)
 
 void cv::cuda::FastOpticalFlowBM::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
 
diff --git a/modules/cudalegacy/src/needle_map.cpp b/modules/cudalegacy/src/needle_map.cpp
index 51cb9dffc..185bfc1e8 100644
--- a/modules/cudalegacy/src/needle_map.cpp
+++ b/modules/cudalegacy/src/needle_map.cpp
@@ -45,7 +45,7 @@
 using namespace cv;
 using namespace cv::cuda;
 
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+#if !defined (HAVE_CUDA) || !defined(HAVE_OPENCV_CUDAIMGPROC) || defined (CUDA_DISABLER)
 
 void cv::cuda::createOpticalFlowNeedleMap(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
 
diff --git a/modules/imgcodecs/CMakeLists.txt b/modules/imgcodecs/CMakeLists.txt
index 50e2d5da6..6d565217a 100644
--- a/modules/imgcodecs/CMakeLists.txt
+++ b/modules/imgcodecs/CMakeLists.txt
@@ -1,7 +1,3 @@
-if(WINRT)
-  ocv_module_disable(imgcodecs)
-endif()
-
 set(the_description "Image codecs")
 ocv_add_module(imgcodecs opencv_imgproc WRAP java python)
 
diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
index b0c942172..30846efea 100644
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@@ -185,13 +185,14 @@ compression parameters :
 
     void createAlphaMat(Mat &mat)
     {
+        CV_Assert(mat.channels() == 4);
         for (int i = 0; i < mat.rows; ++i) {
             for (int j = 0; j < mat.cols; ++j) {
-                Vec4b& rgba = mat.at<Vec4b>(i, j);
-                rgba[0] = UCHAR_MAX;
-                rgba[1] = saturate_cast<uchar>((float (mat.cols - j)) / ((float)mat.cols) * UCHAR_MAX);
-                rgba[2] = saturate_cast<uchar>((float (mat.rows - i)) / ((float)mat.rows) * UCHAR_MAX);
-                rgba[3] = saturate_cast<uchar>(0.5 * (rgba[1] + rgba[2]));
+                Vec4b& bgra = mat.at<Vec4b>(i, j);
+                bgra[0] = UCHAR_MAX; // Blue
+                bgra[1] = saturate_cast<uchar>((float (mat.cols - j)) / ((float)mat.cols) * UCHAR_MAX); // Green
+                bgra[2] = saturate_cast<uchar>((float (mat.rows - i)) / ((float)mat.rows) * UCHAR_MAX); // Red
+                bgra[3] = saturate_cast<uchar>(0.5 * (bgra[1] + bgra[2])); // Alpha
             }
         }
     }
diff --git a/modules/imgcodecs/src/grfmt_jpeg2000.cpp b/modules/imgcodecs/src/grfmt_jpeg2000.cpp
index 83fd55a59..e499c58b8 100644
--- a/modules/imgcodecs/src/grfmt_jpeg2000.cpp
+++ b/modules/imgcodecs/src/grfmt_jpeg2000.cpp
@@ -45,6 +45,7 @@
 #ifdef HAVE_JASPER
 
 #include "grfmt_jpeg2000.hpp"
+#include "opencv2/imgproc.hpp"
 
 #ifdef WIN32
 #define JAS_WIN_MSVC_BUILD 1
@@ -159,6 +160,21 @@ bool  Jpeg2KDecoder::readData( Mat& img )
     jas_stream_t* stream = (jas_stream_t*)m_stream;
     jas_image_t* image = (jas_image_t*)m_image;
 
+#ifndef WIN32
+    // At least on some Linux instances the
+    // system libjasper segfaults when
+    // converting color to grey.
+    // We do this conversion manually at the end.
+    Mat clr;
+    if (CV_MAT_CN(img.type()) < CV_MAT_CN(this->type()))
+    {
+        clr.create(img.size().height, img.size().width, this->type());
+        color = true;
+        data = clr.ptr();
+        step = (int)clr.step;
+    }
+#endif
+
     if( stream && image )
     {
         bool convert;
@@ -171,7 +187,7 @@ bool  Jpeg2KDecoder::readData( Mat& img )
         else
         {
             convert = (jas_clrspc_fam( jas_image_clrspc( image ) ) != JAS_CLRSPC_FAM_GRAY);
-            colorspace = JAS_CLRSPC_SGRAY; // TODO GENGRAY or SGRAY?
+            colorspace = JAS_CLRSPC_SGRAY; // TODO GENGRAY or SGRAY? (GENGRAY fails on Win.)
         }
 
         // convert to the desired colorspace
@@ -256,6 +272,13 @@ bool  Jpeg2KDecoder::readData( Mat& img )
 
     close();
 
+#ifndef WIN32
+    if (!clr.empty())
+    {
+        cv::cvtColor(clr, img, COLOR_BGR2GRAY);
+    }
+#endif
+
     return result;
 }
 
diff --git a/modules/imgcodecs/src/loadsave.cpp b/modules/imgcodecs/src/loadsave.cpp
index 8526a4a3f..383c25a2b 100644
--- a/modules/imgcodecs/src/loadsave.cpp
+++ b/modules/imgcodecs/src/loadsave.cpp
@@ -374,15 +374,8 @@ imreadmulti_(const String& filename, int flags, std::vector<Mat>& mats)
                 type = CV_MAKETYPE(CV_MAT_DEPTH(type), 1);
         }
 
-        // established the required input image size.
-        CvSize size;
-        size.width = decoder->width();
-        size.height = decoder->height();
-
-        Mat mat;
-        mat.create(size.height, size.width, type);
-
         // read the image data
+        Mat mat(decoder->height(), decoder->width(), type);
         if (!decoder->readData(mat))
         {
             break;
diff --git a/modules/imgcodecs/test/test_grfmt.cpp b/modules/imgcodecs/test/test_grfmt.cpp
index 423d030a0..92238a95f 100644
--- a/modules/imgcodecs/test/test_grfmt.cpp
+++ b/modules/imgcodecs/test/test_grfmt.cpp
@@ -87,6 +87,9 @@ TEST(Imgcodecs_imread, regression)
 {
     const char* const filenames[] =
     {
+#ifdef HAVE_JASPER
+        "Rome.jp2",
+#endif
         "color_palette_alpha.png",
         "multipage.tif",
         "rle.hdr",
@@ -99,16 +102,32 @@ TEST(Imgcodecs_imread, regression)
 
     for (size_t i = 0; i < sizeof(filenames) / sizeof(filenames[0]); ++i)
     {
-        ASSERT_TRUE(imread_compare(folder + string(filenames[i]), IMREAD_UNCHANGED));
-        ASSERT_TRUE(imread_compare(folder + string(filenames[i]), IMREAD_GRAYSCALE));
-        ASSERT_TRUE(imread_compare(folder + string(filenames[i]), IMREAD_COLOR));
-        ASSERT_TRUE(imread_compare(folder + string(filenames[i]), IMREAD_ANYDEPTH));
-        ASSERT_TRUE(imread_compare(folder + string(filenames[i]), IMREAD_ANYCOLOR));
-        if (i != 2) // GDAL does not support hdr
-            ASSERT_TRUE(imread_compare(folder + string(filenames[i]), IMREAD_LOAD_GDAL));
+        const string path = folder + string(filenames[i]);
+        ASSERT_TRUE(imread_compare(path, IMREAD_UNCHANGED));
+        ASSERT_TRUE(imread_compare(path, IMREAD_GRAYSCALE));
+        ASSERT_TRUE(imread_compare(path, IMREAD_COLOR));
+        ASSERT_TRUE(imread_compare(path, IMREAD_ANYDEPTH));
+        ASSERT_TRUE(imread_compare(path, IMREAD_ANYCOLOR));
+        if (path.substr(path.length() - 3) != "hdr")
+        {
+            // GDAL does not support hdr
+            ASSERT_TRUE(imread_compare(path, IMREAD_LOAD_GDAL));
+        }
     }
 }
 
+#ifdef HAVE_JASPER
+TEST(Imgcodecs_jasper, regression)
+{
+    const string folder = string(cvtest::TS::ptr()->get_data_path()) + "/readwrite/";
+
+    ASSERT_TRUE(imread_compare(folder + "Bretagne2.jp2", IMREAD_COLOR));
+    ASSERT_TRUE(imread_compare(folder + "Bretagne2.jp2", IMREAD_GRAYSCALE));
+    ASSERT_TRUE(imread_compare(folder + "Grey.jp2", IMREAD_COLOR));
+    ASSERT_TRUE(imread_compare(folder + "Grey.jp2", IMREAD_GRAYSCALE));
+}
+#endif
+
 class CV_GrfmtWriteBigImageTest : public cvtest::BaseTest
 {
 public:
diff --git a/modules/imgproc/src/colormap.cpp b/modules/imgproc/src/colormap.cpp
index 08ff44a5c..86d8679e6 100644
--- a/modules/imgproc/src/colormap.cpp
+++ b/modules/imgproc/src/colormap.cpp
@@ -83,7 +83,6 @@ Mat interp1_(const Mat& X_, const Mat& Y_, const Mat& XI)
     // interpolated values
     Mat yi = Mat::zeros(XI.size(), XI.type());
     for(int i = 0; i < n; i++) {
-        int c = 0;
         int low = 0;
         int high = X.rows - 1;
         // set bounds
@@ -93,7 +92,7 @@ Mat interp1_(const Mat& X_, const Mat& Y_, const Mat& XI)
             low = high - 1;
         // binary search
         while((high-low)>1) {
-            c = low + ((high - low) >> 1);
+            const int c = low + ((high - low) >> 1);
             if(XI.at<_Tp>(i,0) > X.at<_Tp>(c,0)) {
                 low = c;
             } else {
diff --git a/modules/imgproc/src/emd.cpp b/modules/imgproc/src/emd.cpp
index 69006f3c4..22468da6d 100644
--- a/modules/imgproc/src/emd.cpp
+++ b/modules/imgproc/src/emd.cpp
@@ -1152,6 +1152,7 @@ float cv::EMD( InputArray _signature1, InputArray _signature2,
     {
         _flow.create(signature1.rows, signature2.rows, CV_32F);
         flow = _flow.getMat();
+        flow = Scalar::all(0);
         _cflow = flow;
     }
 
diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp
index 5172024b7..e11df27f0 100644
--- a/modules/imgproc/src/hough.cpp
+++ b/modules/imgproc/src/hough.cpp
@@ -90,11 +90,8 @@ HoughLinesStandard( const Mat& img, float rho, float theta,
     int width = img.cols;
     int height = img.rows;
 
-    if (max_theta < 0 || max_theta > CV_PI ) {
-        CV_Error( CV_StsBadArg, "max_theta must fall between 0 and pi" );
-    }
-    if (min_theta < 0 || min_theta > max_theta ) {
-        CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" );
+    if (max_theta < min_theta ) {
+        CV_Error( CV_StsBadArg, "max_theta must be greater than min_theta" );
     }
     int numangle = cvRound((max_theta - min_theta) / theta);
     int numrho = cvRound(((width + height) * 2 + 1) / rho);
@@ -178,7 +175,7 @@ HoughLinesStandard( const Mat& img, float rho, float theta,
         int n = cvFloor(idx*scale) - 1;
         int r = idx - (n+1)*(numrho+2) - 1;
         line.rho = (r - (numrho - 1)*0.5f) * rho;
-        line.angle = n * theta;
+        line.angle = static_cast<float>(min_theta) + n * theta;
         lines.push_back(Vec2f(line.rho, line.angle));
     }
 }
diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt
index e38ec1f19..38f410e64 100644
--- a/modules/java/CMakeLists.txt
+++ b/modules/java/CMakeLists.txt
@@ -318,12 +318,7 @@ else(ANDROID)
                      COMMENT "Generating ${JAR_NAME}"
                     )
 
-  if(WIN32)
-    set(JAR_INSTALL_DIR java)
-  else(WIN32)
-    set(JAR_INSTALL_DIR share/OpenCV/java)
-  endif(WIN32)
-  install(FILES ${JAR_FILE} OPTIONAL DESTINATION ${JAR_INSTALL_DIR} COMPONENT java)
+  install(FILES ${JAR_FILE} OPTIONAL DESTINATION ${OPENCV_JAR_INSTALL_PATH} COMPONENT java)
 endif(ANDROID)
 
 # step 5: build native part
@@ -398,12 +393,12 @@ if(ANDROID)
 else()
   if(NOT INSTALL_CREATE_DISTRIB)
     ocv_install_target(${the_module} OPTIONAL EXPORT OpenCVModules
-            RUNTIME DESTINATION ${JAR_INSTALL_DIR} COMPONENT java
-            LIBRARY DESTINATION ${JAR_INSTALL_DIR} COMPONENT java)
+            RUNTIME DESTINATION ${OPENCV_JAR_INSTALL_PATH} COMPONENT java
+            LIBRARY DESTINATION ${OPENCV_JAR_INSTALL_PATH} COMPONENT java)
   else()
     ocv_install_target(${the_module} OPTIONAL EXPORT OpenCVModules
-            RUNTIME DESTINATION ${JAR_INSTALL_DIR}/${OpenCV_ARCH} COMPONENT java
-            LIBRARY DESTINATION ${JAR_INSTALL_DIR}/${OpenCV_ARCH} COMPONENT java)
+            RUNTIME DESTINATION ${OPENCV_JAR_INSTALL_PATH}/${OpenCV_ARCH} COMPONENT java
+            LIBRARY DESTINATION ${OPENCV_JAR_INSTALL_PATH}/${OpenCV_ARCH} COMPONENT java)
   endif()
 endif()
 
diff --git a/modules/java/android_test/src/org/opencv/test/OpenCVTestRunner.java b/modules/java/android_test/src/org/opencv/test/OpenCVTestRunner.java
index 9425e2644..22f122928 100644
--- a/modules/java/android_test/src/org/opencv/test/OpenCVTestRunner.java
+++ b/modules/java/android_test/src/org/opencv/test/OpenCVTestRunner.java
@@ -82,7 +82,7 @@ public class OpenCVTestRunner extends InstrumentationTestRunner {
             // Using OpenCV Manager for initialization;
 
             Log("Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, getContext(), mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, getContext(), mLoaderCallback);
 
             synchronized (this) {
                 try {
diff --git a/modules/java/generator/gen_java.py b/modules/java/generator/gen_java.py
index ccdb03fda..b9fce4294 100755
--- a/modules/java/generator/gen_java.py
+++ b/modules/java/generator/gen_java.py
@@ -75,7 +75,7 @@ const_ignore_list = (
     "CV_CAP_PROP_CONVERT_RGB",
     "CV_CAP_PROP_WHITE_BALANCE_BLUE_U",
     "CV_CAP_PROP_RECTIFICATION",
-    "CV_CAP_PROP_MONOCROME",
+    "CV_CAP_PROP_MONOCHROME",
     "CV_CAP_PROP_SHARPNESS",
     "CV_CAP_PROP_AUTO_EXPOSURE",
     "CV_CAP_PROP_GAMMA",
diff --git a/modules/java/generator/src/java/android+AsyncServiceHelper.java b/modules/java/generator/src/java/android+AsyncServiceHelper.java
index 568f3da17..e18d5a500 100644
--- a/modules/java/generator/src/java/android+AsyncServiceHelper.java
+++ b/modules/java/generator/src/java/android+AsyncServiceHelper.java
@@ -21,8 +21,9 @@ class AsyncServiceHelper
             final LoaderCallbackInterface Callback)
     {
         AsyncServiceHelper helper = new AsyncServiceHelper(Version, AppContext, Callback);
-        if (AppContext.bindService(new Intent("org.opencv.engine.BIND"),
-                helper.mServiceConnection, Context.BIND_AUTO_CREATE))
+        Intent intent = new Intent("org.opencv.engine.BIND");
+        intent.setPackage("org.opencv.engine");
+        if (AppContext.bindService(intent, helper.mServiceConnection, Context.BIND_AUTO_CREATE))
         {
             return true;
         }
diff --git a/modules/java/generator/src/java/android+OpenCVLoader.java b/modules/java/generator/src/java/android+OpenCVLoader.java
index 13cd42849..339ea2d9d 100644
--- a/modules/java/generator/src/java/android+OpenCVLoader.java
+++ b/modules/java/generator/src/java/android+OpenCVLoader.java
@@ -47,6 +47,22 @@ public class OpenCVLoader
      */
     public static final String OPENCV_VERSION_2_4_9 = "2.4.9";
 
+    /**
+     * OpenCV Library version 2.4.10.
+     */
+    public static final String OPENCV_VERSION_2_4_10 = "2.4.10";
+
+    /**
+     * OpenCV Library version 2.4.11.
+     */
+    public static final String OPENCV_VERSION_2_4_11 = "2.4.11";
+
+    /**
+     * OpenCV Library version 3.0.0.
+     */
+    public static final String OPENCV_VERSION_3_0_0 = "3.0.0";
+
+
     /**
      * Loads and initializes OpenCV library from current application package. Roughly, it's an analog of system.loadLibrary("opencv_java").
      * @return Returns true is initialization of OpenCV was successful.
diff --git a/modules/ml/CMakeLists.txt b/modules/ml/CMakeLists.txt
index 27b499003..1b64cc4f1 100644
--- a/modules/ml/CMakeLists.txt
+++ b/modules/ml/CMakeLists.txt
@@ -1,2 +1,2 @@
 set(the_description "Machine Learning")
-ocv_define_module(ml opencv_core)
+ocv_define_module(ml opencv_core WRAP java python)
diff --git a/modules/ml/include/opencv2/ml.hpp b/modules/ml/include/opencv2/ml.hpp
index 8959c58d8..a611583ae 100644
--- a/modules/ml/include/opencv2/ml.hpp
+++ b/modules/ml/include/opencv2/ml.hpp
@@ -104,7 +104,7 @@ enum SampleTypes
 It is used for optimizing statmodel accuracy by varying model parameters, the accuracy estimate
 being computed by cross-validation.
  */
-class CV_EXPORTS_W_MAP ParamGrid
+class CV_EXPORTS ParamGrid
 {
 public:
     /** @brief Default constructor */
@@ -112,8 +112,8 @@ public:
     /** @brief Constructor with parameters */
     ParamGrid(double _minVal, double _maxVal, double _logStep);
 
-    CV_PROP_RW double minVal; //!< Minimum value of the statmodel parameter. Default value is 0.
-    CV_PROP_RW double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0.
+    double minVal; //!< Minimum value of the statmodel parameter. Default value is 0.
+    double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0.
     /** @brief Logarithmic step for iterating the statmodel parameter.
 
     The grid determines the following iteration sequence of the statmodel parameter values:
@@ -122,7 +122,7 @@ public:
     \f[\texttt{minVal} * \texttt{logStep} ^n <  \texttt{maxVal}\f]
     The grid is logarithmic, so logStep must always be greater then 1. Default value is 1.
     */
-    CV_PROP_RW double logStep;
+    double logStep;
 };
 
 /** @brief Class encapsulating training data.
@@ -134,22 +134,22 @@ of this class into StatModel::train.
 
 @sa @ref ml_intro_data
  */
-class CV_EXPORTS TrainData
+class CV_EXPORTS_W TrainData
 {
 public:
     static inline float missingValue() { return FLT_MAX; }
     virtual ~TrainData();
 
-    virtual int getLayout() const = 0;
-    virtual int getNTrainSamples() const = 0;
-    virtual int getNTestSamples() const = 0;
-    virtual int getNSamples() const = 0;
-    virtual int getNVars() const = 0;
-    virtual int getNAllVars() const = 0;
+    CV_WRAP virtual int getLayout() const = 0;
+    CV_WRAP virtual int getNTrainSamples() const = 0;
+    CV_WRAP virtual int getNTestSamples() const = 0;
+    CV_WRAP virtual int getNSamples() const = 0;
+    CV_WRAP virtual int getNVars() const = 0;
+    CV_WRAP virtual int getNAllVars() const = 0;
 
-    virtual void getSample(InputArray varIdx, int sidx, float* buf) const = 0;
-    virtual Mat getSamples() const = 0;
-    virtual Mat getMissing() const = 0;
+    CV_WRAP virtual void getSample(InputArray varIdx, int sidx, float* buf) const = 0;
+    CV_WRAP virtual Mat getSamples() const = 0;
+    CV_WRAP virtual Mat getMissing() const = 0;
 
     /** @brief Returns matrix of train samples
 
@@ -163,7 +163,7 @@ public:
     In current implementation the function tries to avoid physical data copying and returns the
     matrix stored inside TrainData (unless the transposition or compression is needed).
      */
-    virtual Mat getTrainSamples(int layout=ROW_SAMPLE,
+    CV_WRAP virtual Mat getTrainSamples(int layout=ROW_SAMPLE,
                                 bool compressSamples=true,
                                 bool compressVars=true) const = 0;
 
@@ -172,7 +172,7 @@ public:
     The function returns ordered or the original categorical responses. Usually it's used in
     regression algorithms.
      */
-    virtual Mat getTrainResponses() const = 0;
+    CV_WRAP virtual Mat getTrainResponses() const = 0;
 
     /** @brief Returns the vector of normalized categorical responses
 
@@ -180,38 +180,38 @@ public:
     classes>-1`. The actual label value can be retrieved then from the class label vector, see
     TrainData::getClassLabels.
      */
-    virtual Mat getTrainNormCatResponses() const = 0;
-    virtual Mat getTestResponses() const = 0;
-    virtual Mat getTestNormCatResponses() const = 0;
-    virtual Mat getResponses() const = 0;
-    virtual Mat getNormCatResponses() const = 0;
-    virtual Mat getSampleWeights() const = 0;
-    virtual Mat getTrainSampleWeights() const = 0;
-    virtual Mat getTestSampleWeights() const = 0;
-    virtual Mat getVarIdx() const = 0;
-    virtual Mat getVarType() const = 0;
-    virtual int getResponseType() const = 0;
-    virtual Mat getTrainSampleIdx() const = 0;
-    virtual Mat getTestSampleIdx() const = 0;
-    virtual void getValues(int vi, InputArray sidx, float* values) const = 0;
+    CV_WRAP virtual Mat getTrainNormCatResponses() const = 0;
+    CV_WRAP virtual Mat getTestResponses() const = 0;
+    CV_WRAP virtual Mat getTestNormCatResponses() const = 0;
+    CV_WRAP virtual Mat getResponses() const = 0;
+    CV_WRAP virtual Mat getNormCatResponses() const = 0;
+    CV_WRAP virtual Mat getSampleWeights() const = 0;
+    CV_WRAP virtual Mat getTrainSampleWeights() const = 0;
+    CV_WRAP virtual Mat getTestSampleWeights() const = 0;
+    CV_WRAP virtual Mat getVarIdx() const = 0;
+    CV_WRAP virtual Mat getVarType() const = 0;
+    CV_WRAP virtual int getResponseType() const = 0;
+    CV_WRAP virtual Mat getTrainSampleIdx() const = 0;
+    CV_WRAP virtual Mat getTestSampleIdx() const = 0;
+    CV_WRAP virtual void getValues(int vi, InputArray sidx, float* values) const = 0;
     virtual void getNormCatValues(int vi, InputArray sidx, int* values) const = 0;
-    virtual Mat getDefaultSubstValues() const = 0;
+    CV_WRAP virtual Mat getDefaultSubstValues() const = 0;
 
-    virtual int getCatCount(int vi) const = 0;
+    CV_WRAP virtual int getCatCount(int vi) const = 0;
 
     /** @brief Returns the vector of class labels
 
     The function returns vector of unique labels occurred in the responses.
      */
-    virtual Mat getClassLabels() const = 0;
+    CV_WRAP virtual Mat getClassLabels() const = 0;
 
-    virtual Mat getCatOfs() const = 0;
-    virtual Mat getCatMap() const = 0;
+    CV_WRAP virtual Mat getCatOfs() const = 0;
+    CV_WRAP virtual Mat getCatMap() const = 0;
 
     /** @brief Splits the training data into the training and test parts
     @sa TrainData::setTrainTestSplitRatio
      */
-    virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;
+    CV_WRAP virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;
 
     /** @brief Splits the training data into the training and test parts
 
@@ -221,10 +221,10 @@ public:
     subset can be retrieved and processed as well.
     @sa TrainData::setTrainTestSplit
      */
-    virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
-    virtual void shuffleTrainTest() = 0;
+    CV_WRAP virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
+    CV_WRAP virtual void shuffleTrainTest() = 0;
 
-    static Mat getSubVector(const Mat& vec, const Mat& idx);
+    CV_WRAP static Mat getSubVector(const Mat& vec, const Mat& idx);
 
     /** @brief Reads the dataset from a .csv file and returns the ready-to-use training data.
 
@@ -280,7 +280,7 @@ public:
         <number_of_variables_in_responses>`, containing types of each input and output variable. See
         ml::VariableTypes.
      */
-    static Ptr<TrainData> create(InputArray samples, int layout, InputArray responses,
+    CV_WRAP static Ptr<TrainData> create(InputArray samples, int layout, InputArray responses,
                                  InputArray varIdx=noArray(), InputArray sampleIdx=noArray(),
                                  InputArray sampleWeights=noArray(), InputArray varType=noArray());
 };
@@ -297,15 +297,15 @@ public:
         COMPRESSED_INPUT=2,
         PREPROCESSED_INPUT=4
     };
-    virtual void clear();
+    CV_WRAP virtual void clear();
 
     /** @brief Returns the number of variables in training samples */
-    virtual int getVarCount() const = 0;
+    CV_WRAP virtual int getVarCount() const = 0;
 
     /** @brief Returns true if the model is trained */
-    virtual bool isTrained() const = 0;
+    CV_WRAP virtual bool isTrained() const = 0;
     /** @brief Returns true if the model is classifier */
-    virtual bool isClassifier() const = 0;
+    CV_WRAP virtual bool isClassifier() const = 0;
 
     /** @brief Trains the statistical model
 
@@ -314,7 +314,7 @@ public:
     @param flags optional flags, depending on the model. Some of the models can be updated with the
         new training samples, not completely overwritten (such as NormalBayesClassifier or ANN_MLP).
      */
-    virtual bool train( const Ptr<TrainData>& trainData, int flags=0 );
+    CV_WRAP virtual bool train( const Ptr<TrainData>& trainData, int flags=0 );
 
     /** @brief Trains the statistical model
 
@@ -322,7 +322,7 @@ public:
     @param layout See ml::SampleTypes.
     @param responses vector of responses associated with the training samples.
     */
-    virtual bool train( InputArray samples, int layout, InputArray responses );
+    CV_WRAP virtual bool train( InputArray samples, int layout, InputArray responses );
 
     /** @brief Computes error on the training or test dataset
 
@@ -337,7 +337,7 @@ public:
     The method uses StatModel::predict to compute the error. For regression models the error is
     computed as RMS, for classifiers - as a percent of missclassified samples (0%-100%).
      */
-    virtual float calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const;
+    CV_WRAP virtual float calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const;
 
     /** @brief Predicts response(s) for the provided sample(s)
 
@@ -345,7 +345,7 @@ public:
     @param results The optional output matrix of results.
     @param flags The optional flags, model-dependent. See cv::ml::StatModel::Flags.
      */
-    virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
+    CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
 
     /** @brief Loads model from the file
 
@@ -393,11 +393,11 @@ public:
 
     /** Saves the model to a file.
     In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */
-    virtual void save(const String& filename) const;
+    CV_WRAP virtual void save(const String& filename) const;
 
     /** Returns model string identifier.
     This string is used as top level xml/yml node tag when model is saved to a file or string. */
-    virtual String getDefaultModelName() const = 0;
+    CV_WRAP virtual String getDefaultModelName() const = 0;
 };
 
 /****************************************************************************************\
@@ -419,12 +419,12 @@ public:
     The vector outputProbs contains the output probabilities corresponding to each element of
     result.
      */
-    virtual float predictProb( InputArray inputs, OutputArray outputs,
+    CV_WRAP virtual float predictProb( InputArray inputs, OutputArray outputs,
                                OutputArray outputProbs, int flags=0 ) const = 0;
 
     /** Creates empty model
     Use StatModel::train to train the model after creation. */
-    static Ptr<NormalBayesClassifier> create();
+    CV_WRAP static Ptr<NormalBayesClassifier> create();
 };
 
 /****************************************************************************************\
@@ -440,16 +440,28 @@ class CV_EXPORTS_W KNearest : public StatModel
 public:
 
     /** Default number of neighbors to use in predict method. */
-    CV_PURE_PROPERTY(int, DefaultK)
+    /** @see setDefaultK */
+    CV_WRAP virtual int getDefaultK() const = 0;
+    /** @copybrief getDefaultK @see getDefaultK */
+    CV_WRAP virtual void setDefaultK(int val) = 0;
 
     /** Whether classification or regression model should be trained. */
-    CV_PURE_PROPERTY(bool, IsClassifier)
+    /** @see setIsClassifier */
+    CV_WRAP virtual bool getIsClassifier() const = 0;
+    /** @copybrief getIsClassifier @see getIsClassifier */
+    CV_WRAP virtual void setIsClassifier(bool val) = 0;
 
     /** Parameter for KDTree implementation. */
-    CV_PURE_PROPERTY(int, Emax)
+    /** @see setEmax */
+    CV_WRAP virtual int getEmax() const = 0;
+    /** @copybrief getEmax @see getEmax */
+    CV_WRAP virtual void setEmax(int val) = 0;
 
     /** %Algorithm type, one of KNearest::Types. */
-    CV_PURE_PROPERTY(int, AlgorithmType)
+    /** @see setAlgorithmType */
+    CV_WRAP virtual int getAlgorithmType() const = 0;
+    /** @copybrief getAlgorithmType @see getAlgorithmType */
+    CV_WRAP virtual void setAlgorithmType(int val) = 0;
 
     /** @brief Finds the neighbors and predicts responses for input vectors.
 
@@ -477,7 +489,7 @@ public:
 
     The function is parallelized with the TBB library.
      */
-    virtual float findNearest( InputArray samples, int k,
+    CV_WRAP virtual float findNearest( InputArray samples, int k,
                                OutputArray results,
                                OutputArray neighborResponses=noArray(),
                                OutputArray dist=noArray() ) const = 0;
@@ -494,7 +506,7 @@ public:
 
     The static method creates empty %KNearest classifier. It should be then trained using StatModel::train method.
      */
-    static Ptr<KNearest> create();
+    CV_WRAP static Ptr<KNearest> create();
 };
 
 /****************************************************************************************\
@@ -518,52 +530,79 @@ public:
 
     /** Type of a %SVM formulation.
     See SVM::Types. Default value is SVM::C_SVC. */
-    CV_PURE_PROPERTY(int, Type)
+    /** @see setType */
+    CV_WRAP virtual int getType() const = 0;
+    /** @copybrief getType @see getType */
+    CV_WRAP virtual void setType(int val) = 0;
 
     /** Parameter \f$\gamma\f$ of a kernel function.
     For SVM::POLY, SVM::RBF, SVM::SIGMOID or SVM::CHI2. Default value is 1. */
-    CV_PURE_PROPERTY(double, Gamma)
+    /** @see setGamma */
+    CV_WRAP virtual double getGamma() const = 0;
+    /** @copybrief getGamma @see getGamma */
+    CV_WRAP virtual void setGamma(double val) = 0;
 
     /** Parameter _coef0_ of a kernel function.
     For SVM::POLY or SVM::SIGMOID. Default value is 0.*/
-    CV_PURE_PROPERTY(double, Coef0)
+    /** @see setCoef0 */
+    CV_WRAP virtual double getCoef0() const = 0;
+    /** @copybrief getCoef0 @see getCoef0 */
+    CV_WRAP virtual void setCoef0(double val) = 0;
 
     /** Parameter _degree_ of a kernel function.
     For SVM::POLY. Default value is 0. */
-    CV_PURE_PROPERTY(double, Degree)
+    /** @see setDegree */
+    CV_WRAP virtual double getDegree() const = 0;
+    /** @copybrief getDegree @see getDegree */
+    CV_WRAP virtual void setDegree(double val) = 0;
 
     /** Parameter _C_ of a %SVM optimization problem.
     For SVM::C_SVC, SVM::EPS_SVR or SVM::NU_SVR. Default value is 0. */
-    CV_PURE_PROPERTY(double, C)
+    /** @see setC */
+    CV_WRAP virtual double getC() const = 0;
+    /** @copybrief getC @see getC */
+    CV_WRAP virtual void setC(double val) = 0;
 
     /** Parameter \f$\nu\f$ of a %SVM optimization problem.
     For SVM::NU_SVC, SVM::ONE_CLASS or SVM::NU_SVR. Default value is 0. */
-    CV_PURE_PROPERTY(double, Nu)
+    /** @see setNu */
+    CV_WRAP virtual double getNu() const = 0;
+    /** @copybrief getNu @see getNu */
+    CV_WRAP virtual void setNu(double val) = 0;
 
     /** Parameter \f$\epsilon\f$ of a %SVM optimization problem.
     For SVM::EPS_SVR. Default value is 0. */
-    CV_PURE_PROPERTY(double, P)
+    /** @see setP */
+    CV_WRAP virtual double getP() const = 0;
+    /** @copybrief getP @see getP */
+    CV_WRAP virtual void setP(double val) = 0;
 
     /** Optional weights in the SVM::C_SVC problem, assigned to particular classes.
     They are multiplied by _C_ so the parameter _C_ of class _i_ becomes `classWeights(i) * C`. Thus
     these weights affect the misclassification penalty for different classes. The larger weight,
     the larger penalty on misclassification of data from the corresponding class. Default value is
     empty Mat. */
-    CV_PURE_PROPERTY_S(cv::Mat, ClassWeights)
+    /** @see setClassWeights */
+    CV_WRAP virtual cv::Mat getClassWeights() const = 0;
+    /** @copybrief getClassWeights @see getClassWeights */
+    CV_WRAP virtual void setClassWeights(const cv::Mat &val) = 0;
 
     /** Termination criteria of the iterative %SVM training procedure which solves a partial
     case of constrained quadratic optimization problem.
     You can specify tolerance and/or the maximum number of iterations. Default value is
     `TermCriteria( TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, FLT_EPSILON )`; */
-    CV_PURE_PROPERTY_S(cv::TermCriteria, TermCriteria)
+    /** @see setTermCriteria */
+    CV_WRAP virtual cv::TermCriteria getTermCriteria() const = 0;
+    /** @copybrief getTermCriteria @see getTermCriteria */
+    CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
 
     /** Type of a %SVM kernel.
     See SVM::KernelTypes. Default value is SVM::RBF. */
-    virtual int getKernelType() const = 0;
+    CV_WRAP virtual int getKernelType() const = 0;
 
     /** Initialize with one of predefined kernels.
     See SVM::KernelTypes. */
-    virtual void setKernel(int kernelType) = 0;
+    CV_WRAP virtual void setKernel(int kernelType) = 0;
 
     /** Initialize with custom kernel.
     See SVM::Kernel class for implementation details */
@@ -695,7 +734,7 @@ public:
     The method returns rho parameter of the decision function, a scalar subtracted from the weighted
     sum of kernel responses.
      */
-    virtual double getDecisionFunction(int i, OutputArray alpha, OutputArray svidx) const = 0;
+    CV_WRAP virtual double getDecisionFunction(int i, OutputArray alpha, OutputArray svidx) const = 0;
 
     /** @brief Generates a grid for %SVM parameters.
 
@@ -710,7 +749,7 @@ public:
     /** Creates empty model.
     Use StatModel::train to train the model. Since %SVM has several parameters, you may want to
     find the best parameters for your problem, it can be done with SVM::trainAuto. */
-    static Ptr<SVM> create();
+    CV_WRAP static Ptr<SVM> create();
 };
 
 /****************************************************************************************\
@@ -755,29 +794,38 @@ public:
     Default value of the parameter is EM::DEFAULT_NCLUSTERS=5. Some of %EM implementation could
     determine the optimal number of mixtures within a specified value range, but that is not the
     case in ML yet. */
-    CV_PURE_PROPERTY(int, ClustersNumber)
+    /** @see setClustersNumber */
+    CV_WRAP virtual int getClustersNumber() const = 0;
+    /** @copybrief getClustersNumber @see getClustersNumber */
+    CV_WRAP virtual void setClustersNumber(int val) = 0;
 
     /** Constraint on covariance matrices which defines type of matrices.
     See EM::Types. */
-    CV_PURE_PROPERTY(int, CovarianceMatrixType)
+    /** @see setCovarianceMatrixType */
+    CV_WRAP virtual int getCovarianceMatrixType() const = 0;
+    /** @copybrief getCovarianceMatrixType @see getCovarianceMatrixType */
+    CV_WRAP virtual void setCovarianceMatrixType(int val) = 0;
 
     /** The termination criteria of the %EM algorithm.
     The %EM algorithm can be terminated by the number of iterations termCrit.maxCount (number of
     M-steps) or when relative change of likelihood logarithm is less than termCrit.epsilon. Default
     maximum number of iterations is EM::DEFAULT_MAX_ITERS=100. */
-    CV_PURE_PROPERTY_S(TermCriteria, TermCriteria)
+    /** @see setTermCriteria */
+    CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
+    /** @copybrief getTermCriteria @see getTermCriteria */
+    CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
 
     /** @brief Returns weights of the mixtures
 
     Returns vector with the number of elements equal to the number of mixtures.
      */
-    virtual Mat getWeights() const = 0;
+    CV_WRAP virtual Mat getWeights() const = 0;
     /** @brief Returns the cluster centers (means of the Gaussian mixture)
 
     Returns matrix with the number of rows equal to the number of mixtures and number of columns
     equal to the space dimensionality.
      */
-    virtual Mat getMeans() const = 0;
+    CV_WRAP virtual Mat getMeans() const = 0;
     /** @brief Returns covariation matrices
 
     Returns vector of covariation matrices. Number of matrices is the number of gaussian mixtures,
@@ -797,7 +845,7 @@ public:
     the sample. First element is an index of the most probable mixture component for the given
     sample.
      */
-    CV_WRAP virtual Vec2d predict2(InputArray sample, OutputArray probs) const = 0;
+    CV_WRAP CV_WRAP virtual Vec2d predict2(InputArray sample, OutputArray probs) const = 0;
 
     /** @brief Estimate the Gaussian mixture parameters from a samples set.
 
@@ -827,7 +875,7 @@ public:
         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
         CV_64FC1 type.
      */
-    virtual bool trainEM(InputArray samples,
+    CV_WRAP virtual bool trainEM(InputArray samples,
                          OutputArray logLikelihoods=noArray(),
                          OutputArray labels=noArray(),
                          OutputArray probs=noArray()) = 0;
@@ -859,7 +907,7 @@ public:
         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
         CV_64FC1 type.
     */
-    virtual bool trainE(InputArray samples, InputArray means0,
+    CV_WRAP virtual bool trainE(InputArray samples, InputArray means0,
                         InputArray covs0=noArray(),
                         InputArray weights0=noArray(),
                         OutputArray logLikelihoods=noArray(),
@@ -884,7 +932,7 @@ public:
         mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
         CV_64FC1 type.
     */
-    virtual bool trainM(InputArray samples, InputArray probs0,
+    CV_WRAP virtual bool trainM(InputArray samples, InputArray probs0,
                         OutputArray logLikelihoods=noArray(),
                         OutputArray labels=noArray(),
                         OutputArray probs=noArray()) = 0;
@@ -893,7 +941,7 @@ public:
     The model should be trained then using StatModel::train(traindata, flags) method. Alternatively, you
     can use one of the EM::train\* methods or load it from file using StatModel::load\<EM\>(filename).
      */
-    static Ptr<EM> create();
+    CV_WRAP static Ptr<EM> create();
 };
 
 /****************************************************************************************\
@@ -926,46 +974,70 @@ public:
     values. In case of regression and 2-class classification the optimal split can be found
     efficiently without employing clustering, thus the parameter is not used in these cases.
     Default value is 10.*/
-    CV_PURE_PROPERTY(int, MaxCategories)
+    /** @see setMaxCategories */
+    CV_WRAP virtual int getMaxCategories() const = 0;
+    /** @copybrief getMaxCategories @see getMaxCategories */
+    CV_WRAP virtual void setMaxCategories(int val) = 0;
 
     /** The maximum possible depth of the tree.
     That is the training algorithms attempts to split a node while its depth is less than maxDepth.
     The root node has zero depth. The actual depth may be smaller if the other termination criteria
     are met (see the outline of the training procedure @ref ml_intro_trees "here"), and/or if the
     tree is pruned. Default value is INT_MAX.*/
-    CV_PURE_PROPERTY(int, MaxDepth)
+    /** @see setMaxDepth */
+    CV_WRAP virtual int getMaxDepth() const = 0;
+    /** @copybrief getMaxDepth @see getMaxDepth */
+    CV_WRAP virtual void setMaxDepth(int val) = 0;
 
     /** If the number of samples in a node is less than this parameter then the node will not be split.
 
     Default value is 10.*/
-    CV_PURE_PROPERTY(int, MinSampleCount)
+    /** @see setMinSampleCount */
+    CV_WRAP virtual int getMinSampleCount() const = 0;
+    /** @copybrief getMinSampleCount @see getMinSampleCount */
+    CV_WRAP virtual void setMinSampleCount(int val) = 0;
 
     /** If CVFolds \> 1 then algorithms prunes the built decision tree using K-fold
     cross-validation procedure where K is equal to CVFolds.
     Default value is 10.*/
-    CV_PURE_PROPERTY(int, CVFolds)
+    /** @see setCVFolds */
+    CV_WRAP virtual int getCVFolds() const = 0;
+    /** @copybrief getCVFolds @see getCVFolds */
+    CV_WRAP virtual void setCVFolds(int val) = 0;
 
     /** If true then surrogate splits will be built.
     These splits allow to work with missing data and compute variable importance correctly.
     Default value is false.
     @note currently it's not implemented.*/
-    CV_PURE_PROPERTY(bool, UseSurrogates)
+    /** @see setUseSurrogates */
+    CV_WRAP virtual bool getUseSurrogates() const = 0;
+    /** @copybrief getUseSurrogates @see getUseSurrogates */
+    CV_WRAP virtual void setUseSurrogates(bool val) = 0;
 
     /** If true then a pruning will be harsher.
     This will make a tree more compact and more resistant to the training data noise but a bit less
     accurate. Default value is true.*/
-    CV_PURE_PROPERTY(bool, Use1SERule)
+    /** @see setUse1SERule */
+    CV_WRAP virtual bool getUse1SERule() const = 0;
+    /** @copybrief getUse1SERule @see getUse1SERule */
+    CV_WRAP virtual void setUse1SERule(bool val) = 0;
 
     /** If true then pruned branches are physically removed from the tree.
     Otherwise they are retained and it is possible to get results from the original unpruned (or
     pruned less aggressively) tree. Default value is true.*/
-    CV_PURE_PROPERTY(bool, TruncatePrunedTree)
+    /** @see setTruncatePrunedTree */
+    CV_WRAP virtual bool getTruncatePrunedTree() const = 0;
+    /** @copybrief getTruncatePrunedTree @see getTruncatePrunedTree */
+    CV_WRAP virtual void setTruncatePrunedTree(bool val) = 0;
 
     /** Termination criteria for regression trees.
     If all absolute differences between an estimated value in a node and values of train samples
     in this node are less than this parameter then the node will not be split further. Default
     value is 0.01f*/
-    CV_PURE_PROPERTY(float, RegressionAccuracy)
+    /** @see setRegressionAccuracy */
+    CV_WRAP virtual float getRegressionAccuracy() const = 0;
+    /** @copybrief getRegressionAccuracy @see getRegressionAccuracy */
+    CV_WRAP virtual void setRegressionAccuracy(float val) = 0;
 
     /** @brief The array of a priori class probabilities, sorted by the class label value.
 
@@ -982,7 +1054,10 @@ public:
     category is 1 and the weight of the second category is 10, then each mistake in predicting
     the second category is equivalent to making 10 mistakes in predicting the first category.
     Default value is empty Mat.*/
-    CV_PURE_PROPERTY_S(cv::Mat, Priors)
+    /** @see setPriors */
+    CV_WRAP virtual cv::Mat getPriors() const = 0;
+    /** @copybrief getPriors @see getPriors */
+    CV_WRAP virtual void setPriors(const cv::Mat &val) = 0;
 
     /** @brief The class represents a decision tree node.
      */
@@ -1054,7 +1129,7 @@ public:
     trained using train method (see StatModel::train). Alternatively, you can load the model from
     file using StatModel::load\<DTrees\>(filename).
      */
-    static Ptr<DTrees> create();
+    CV_WRAP static Ptr<DTrees> create();
 };
 
 /****************************************************************************************\
@@ -1071,13 +1146,19 @@ public:
 
     /** If true then variable importance will be calculated and then it can be retrieved by RTrees::getVarImportance.
     Default value is false.*/
-    CV_PURE_PROPERTY(bool, CalculateVarImportance)
+    /** @see setCalculateVarImportance */
+    CV_WRAP virtual bool getCalculateVarImportance() const = 0;
+    /** @copybrief getCalculateVarImportance @see getCalculateVarImportance */
+    CV_WRAP virtual void setCalculateVarImportance(bool val) = 0;
 
     /** The size of the randomly selected subset of features at each tree node and that are used
     to find the best split(s).
     If you set it to 0 then the size will be set to the square root of the total number of
     features. Default value is 0.*/
-    CV_PURE_PROPERTY(int, ActiveVarCount)
+    /** @see setActiveVarCount */
+    CV_WRAP virtual int getActiveVarCount() const = 0;
+    /** @copybrief getActiveVarCount @see getActiveVarCount */
+    CV_WRAP virtual void setActiveVarCount(int val) = 0;
 
     /** The termination criteria that specifies when the training algorithm stops.
     Either when the specified number of trees is trained and added to the ensemble or when
@@ -1086,20 +1167,23 @@ public:
     pass a certain number of trees. Also to keep in mind, the number of tree increases the
     prediction time linearly. Default value is TermCriteria(TermCriteria::MAX_ITERS +
     TermCriteria::EPS, 50, 0.1)*/
-    CV_PURE_PROPERTY_S(TermCriteria, TermCriteria)
+    /** @see setTermCriteria */
+    CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
+    /** @copybrief getTermCriteria @see getTermCriteria */
+    CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
 
     /** Returns the variable importance array.
     The method returns the variable importance vector, computed at the training stage when
     CalculateVarImportance is set to true. If this flag was set to false, the empty matrix is
     returned.
      */
-    virtual Mat getVarImportance() const = 0;
+    CV_WRAP virtual Mat getVarImportance() const = 0;
 
     /** Creates the empty model.
     Use StatModel::train to train the model, StatModel::train to create and train the model,
     StatModel::load to load the pre-trained model.
      */
-    static Ptr<RTrees> create();
+    CV_WRAP static Ptr<RTrees> create();
 };
 
 /****************************************************************************************\
@@ -1115,16 +1199,25 @@ class CV_EXPORTS_W Boost : public DTrees
 public:
     /** Type of the boosting algorithm.
     See Boost::Types. Default value is Boost::REAL. */
-    CV_PURE_PROPERTY(int, BoostType)
+    /** @see setBoostType */
+    CV_WRAP virtual int getBoostType() const = 0;
+    /** @copybrief getBoostType @see getBoostType */
+    CV_WRAP virtual void setBoostType(int val) = 0;
 
     /** The number of weak classifiers.
     Default value is 100. */
-    CV_PURE_PROPERTY(int, WeakCount)
+    /** @see setWeakCount */
+    CV_WRAP virtual int getWeakCount() const = 0;
+    /** @copybrief getWeakCount @see getWeakCount */
+    CV_WRAP virtual void setWeakCount(int val) = 0;
 
     /** A threshold between 0 and 1 used to save computational time.
     Samples with summary weight \f$\leq 1 - weight_trim_rate\f$ do not participate in the *next*
     iteration of training. Set this parameter to 0 to turn off this functionality. Default value is 0.95.*/
-    CV_PURE_PROPERTY(double, WeightTrimRate)
+    /** @see setWeightTrimRate */
+    CV_WRAP virtual double getWeightTrimRate() const = 0;
+    /** @copybrief getWeightTrimRate @see getWeightTrimRate */
+    CV_WRAP virtual void setWeightTrimRate(double val) = 0;
 
     /** Boosting type.
     Gentle AdaBoost and Real AdaBoost are often the preferable choices. */
@@ -1139,7 +1232,7 @@ public:
 
     /** Creates the empty model.
     Use StatModel::train to train the model, StatModel::load\<Boost\>(filename) to load the pre-trained model. */
-    static Ptr<Boost> create();
+    CV_WRAP static Ptr<Boost> create();
 };
 
 /****************************************************************************************\
@@ -1189,7 +1282,7 @@ Additional flags for StatModel::train are available: ANN_MLP::TrainFlags.
 
 @sa @ref ml_intro_ann
  */
-class CV_EXPORTS_W ANN_MLP : public StatModel
+class CV_EXPORTS ANN_MLP : public StatModel
 {
 public:
     /** Available training methods */
@@ -1232,37 +1325,61 @@ public:
     You can specify the maximum number of iterations (maxCount) and/or how much the error could
     change between the iterations to make the algorithm continue (epsilon). Default value is
     TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01).*/
-    CV_PURE_PROPERTY(TermCriteria, TermCriteria)
+    /** @see setTermCriteria */
+    virtual TermCriteria getTermCriteria() const = 0;
+    /** @copybrief getTermCriteria @see getTermCriteria */
+    virtual void setTermCriteria(TermCriteria val) = 0;
 
     /** BPROP: Strength of the weight gradient term.
     The recommended value is about 0.1. Default value is 0.1.*/
-    CV_PURE_PROPERTY(double, BackpropWeightScale)
+    /** @see setBackpropWeightScale */
+    virtual double getBackpropWeightScale() const = 0;
+    /** @copybrief getBackpropWeightScale @see getBackpropWeightScale */
+    virtual void setBackpropWeightScale(double val) = 0;
 
     /** BPROP: Strength of the momentum term (the difference between weights on the 2 previous iterations).
     This parameter provides some inertia to smooth the random fluctuations of the weights. It can
     vary from 0 (the feature is disabled) to 1 and beyond. The value 0.1 or so is good enough.
     Default value is 0.1.*/
-    CV_PURE_PROPERTY(double, BackpropMomentumScale)
+    /** @see setBackpropMomentumScale */
+    virtual double getBackpropMomentumScale() const = 0;
+    /** @copybrief getBackpropMomentumScale @see getBackpropMomentumScale */
+    virtual void setBackpropMomentumScale(double val) = 0;
 
     /** RPROP: Initial value \f$\Delta_0\f$ of update-values \f$\Delta_{ij}\f$.
     Default value is 0.1.*/
-    CV_PURE_PROPERTY(double, RpropDW0)
+    /** @see setRpropDW0 */
+    virtual double getRpropDW0() const = 0;
+    /** @copybrief getRpropDW0 @see getRpropDW0 */
+    virtual void setRpropDW0(double val) = 0;
 
     /** RPROP: Increase factor \f$\eta^+\f$.
     It must be \>1. Default value is 1.2.*/
-    CV_PURE_PROPERTY(double, RpropDWPlus)
+    /** @see setRpropDWPlus */
+    virtual double getRpropDWPlus() const = 0;
+    /** @copybrief getRpropDWPlus @see getRpropDWPlus */
+    virtual void setRpropDWPlus(double val) = 0;
 
     /** RPROP: Decrease factor \f$\eta^-\f$.
     It must be \<1. Default value is 0.5.*/
-    CV_PURE_PROPERTY(double, RpropDWMinus)
+    /** @see setRpropDWMinus */
+    virtual double getRpropDWMinus() const = 0;
+    /** @copybrief getRpropDWMinus @see getRpropDWMinus */
+    virtual void setRpropDWMinus(double val) = 0;
 
     /** RPROP: Update-values lower limit \f$\Delta_{min}\f$.
     It must be positive. Default value is FLT_EPSILON.*/
-    CV_PURE_PROPERTY(double, RpropDWMin)
+    /** @see setRpropDWMin */
+    virtual double getRpropDWMin() const = 0;
+    /** @copybrief getRpropDWMin @see getRpropDWMin */
+    virtual void setRpropDWMin(double val) = 0;
 
     /** RPROP: Update-values upper limit \f$\Delta_{max}\f$.
     It must be \>1. Default value is 50.*/
-    CV_PURE_PROPERTY(double, RpropDWMax)
+    /** @see setRpropDWMax */
+    virtual double getRpropDWMax() const = 0;
+    /** @copybrief getRpropDWMax @see getRpropDWMax */
+    virtual void setRpropDWMax(double val) = 0;
 
     /** possible activation functions */
     enum ActivationFunctions {
@@ -1313,29 +1430,47 @@ public:
 
 @sa @ref ml_intro_lr
  */
-class CV_EXPORTS LogisticRegression : public StatModel
+class CV_EXPORTS_W LogisticRegression : public StatModel
 {
 public:
 
     /** Learning rate. */
-    CV_PURE_PROPERTY(double, LearningRate)
+    /** @see setLearningRate */
+    CV_WRAP virtual double getLearningRate() const = 0;
+    /** @copybrief getLearningRate @see getLearningRate */
+    CV_WRAP virtual void setLearningRate(double val) = 0;
 
     /** Number of iterations. */
-    CV_PURE_PROPERTY(int, Iterations)
+    /** @see setIterations */
+    CV_WRAP virtual int getIterations() const = 0;
+    /** @copybrief getIterations @see getIterations */
+    CV_WRAP virtual void setIterations(int val) = 0;
 
     /** Kind of regularization to be applied. See LogisticRegression::RegKinds. */
-    CV_PURE_PROPERTY(int, Regularization)
+    /** @see setRegularization */
+    CV_WRAP virtual int getRegularization() const = 0;
+    /** @copybrief getRegularization @see getRegularization */
+    CV_WRAP virtual void setRegularization(int val) = 0;
 
     /** Kind of training method used. See LogisticRegression::Methods. */
-    CV_PURE_PROPERTY(int, TrainMethod)
+    /** @see setTrainMethod */
+    CV_WRAP virtual int getTrainMethod() const = 0;
+    /** @copybrief getTrainMethod @see getTrainMethod */
+    CV_WRAP virtual void setTrainMethod(int val) = 0;
 
     /** Specifies the number of training samples taken in each step of Mini-Batch Gradient
     Descent. Will only be used if using LogisticRegression::MINI_BATCH training algorithm. It
     has to take values less than the total number of training samples. */
-    CV_PURE_PROPERTY(int, MiniBatchSize)
+    /** @see setMiniBatchSize */
+    CV_WRAP virtual int getMiniBatchSize() const = 0;
+    /** @copybrief getMiniBatchSize @see getMiniBatchSize */
+    CV_WRAP virtual void setMiniBatchSize(int val) = 0;
 
     /** Termination criteria of the algorithm. */
-    CV_PURE_PROPERTY(TermCriteria, TermCriteria)
+    /** @see setTermCriteria */
+    CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
+    /** @copybrief getTermCriteria @see getTermCriteria */
+    CV_WRAP virtual void setTermCriteria(TermCriteria val) = 0;
 
     //! Regularization kinds
     enum RegKinds {
@@ -1357,20 +1492,20 @@ public:
     @param results Predicted labels as a column matrix of type CV_32S.
     @param flags Not used.
      */
-    virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
+    CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
 
     /** @brief This function returns the trained paramters arranged across rows.
 
     For a two class classifcation problem, it returns a row matrix. It returns learnt paramters of
     the Logistic Regression as a matrix of type CV_32F.
      */
-    virtual Mat get_learnt_thetas() const = 0;
+    CV_WRAP virtual Mat get_learnt_thetas() const = 0;
 
     /** @brief Creates empty model.
 
     Creates Logistic Regression model with parameters given.
      */
-    static Ptr<LogisticRegression> create();
+    CV_WRAP static Ptr<LogisticRegression> create();
 };
 
 /****************************************************************************************\
diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp
index 8bed11763..449eb8dcd 100644
--- a/modules/ml/src/svm.cpp
+++ b/modules/ml/src/svm.cpp
@@ -1669,13 +1669,13 @@ public:
         Mat samples = data->getTrainSamples();
         Mat responses;
         bool is_classification = false;
-        Mat class_labels0 = class_labels;
         int class_count = (int)class_labels.total();
 
         if( svmType == C_SVC || svmType == NU_SVC )
         {
             responses = data->getTrainNormCatResponses();
             class_labels = data->getClassLabels();
+            class_count = (int)class_labels.total();
             is_classification = true;
 
             vector<int> temp_class_labels;
@@ -1755,8 +1755,9 @@ public:
         Mat temp_train_responses(train_sample_count, 1, rtype);
         Mat temp_test_responses;
 
+        // If grid.minVal == grid.maxVal, this will allow one and only one pass through the loop with params.var = grid.minVal.
         #define FOR_IN_GRID(var, grid) \
-            for( params.var = grid.minVal; params.var == grid.minVal || params.var < grid.maxVal; params.var *= grid.logStep )
+            for( params.var = grid.minVal; params.var == grid.minVal || params.var < grid.maxVal; params.var = (grid.minVal == grid.maxVal) ? grid.maxVal + 1 : params.var * grid.logStep )
 
         FOR_IN_GRID(C, C_grid)
         FOR_IN_GRID(gamma, gamma_grid)
@@ -1814,7 +1815,6 @@ public:
         }
 
         params = best_params;
-        class_labels = class_labels0;
         return do_train( samples, responses );
     }
 
diff --git a/modules/ml/src/tree.cpp b/modules/ml/src/tree.cpp
index 537728336..143e1fb91 100644
--- a/modules/ml/src/tree.cpp
+++ b/modules/ml/src/tree.cpp
@@ -286,7 +286,14 @@ int DTreesImpl::addTree(const vector<int>& sidx )
                 int ssize = getSubsetSize(split.varIdx);
                 split.subsetOfs = (int)subsets.size();
                 subsets.resize(split.subsetOfs + ssize);
-                memcpy(&subsets[split.subsetOfs], &w->wsubsets[wsplit.subsetOfs], ssize*sizeof(int));
+                // This check verifies that subsets index is in the correct range
+                // as in case ssize == 0 no real resize performed.
+                // Thus memory kept safe.
+                // Also this skips useless memcpy call when size parameter is zero
+                if(ssize > 0)
+                {
+                    memcpy(&subsets[split.subsetOfs], &w->wsubsets[wsplit.subsetOfs], ssize*sizeof(int));
+                }
             }
             node.split = (int)splits.size();
             splits.push_back(split);
diff --git a/modules/ml/test/test_svmtrainauto.cpp b/modules/ml/test/test_svmtrainauto.cpp
new file mode 100644
index 000000000..918d2b711
--- /dev/null
+++ b/modules/ml/test/test_svmtrainauto.cpp
@@ -0,0 +1,89 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace cv;
+using namespace std;
+using cv::ml::SVM;
+using cv::ml::TrainData;
+
+//--------------------------------------------------------------------------------------------
+class CV_SVMTrainAutoTest : public cvtest::BaseTest {
+public:
+    CV_SVMTrainAutoTest() {}
+protected:
+    virtual void run( int start_from );
+};
+
+void CV_SVMTrainAutoTest::run( int /*start_from*/ )
+{
+    int datasize = 100;
+    cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
+    cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
+
+    RNG rng(0);
+    for (int i = 0; i < datasize; ++i)
+    {
+        int response = rng.uniform(0, 2);  // Random from {0, 1}.
+        samples.at<float>( i, 0 ) = rng.uniform(0.f, 0.5f) + response * 0.5f;
+        samples.at<float>( i, 1 ) = rng.uniform(0.f, 0.5f) + response * 0.5f;
+        responses.at<int>( i, 0 ) = response;
+    }
+
+    cv::Ptr<TrainData> data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
+    cv::Ptr<SVM> svm = SVM::create();
+    svm->trainAuto( data, 10 );  // 2-fold cross validation.
+
+    float test_data0[2] = {0.25f, 0.25f};
+    cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 );
+    float result0 = svm->predict( test_point0 );
+    float test_data1[2] = {0.75f, 0.75f};
+    cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 );
+    float result1 = svm->predict( test_point1 );
+
+    if ( fabs( result0 - 0 ) > 0.001 || fabs( result1 - 1 ) > 0.001 )
+    {
+        ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
+    }
+}
+
+TEST(ML_SVM, trainauto) { CV_SVMTrainAutoTest test; test.safe_run(); }
diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp
index 1c0bbf1b0..bc1fedc80 100644
--- a/modules/objdetect/src/cascadedetect.cpp
+++ b/modules/objdetect/src/cascadedetect.cpp
@@ -555,6 +555,7 @@ HaarEvaluator::HaarEvaluator()
     localSize = Size(4, 2);
     lbufSize = Size(0, 0);
     nchannels = 0;
+    tofs = 0;
 }
 
 HaarEvaluator::~HaarEvaluator()
@@ -617,8 +618,7 @@ Ptr<FeatureEvaluator> HaarEvaluator::clone() const
 void HaarEvaluator::computeChannels(int scaleIdx, InputArray img)
 {
     const ScaleData& s = scaleData->at(scaleIdx);
-    tofs = (int)sbufSize.area();
-    sqofs = hasTiltedFeatures ? tofs*2 : tofs;
+    sqofs = hasTiltedFeatures ? sbufSize.area() * 2 : sbufSize.area();
 
     if (img.isUMat())
     {
@@ -659,6 +659,9 @@ void HaarEvaluator::computeChannels(int scaleIdx, InputArray img)
 
 void HaarEvaluator::computeOptFeatures()
 {
+    if (hasTiltedFeatures)
+        tofs = sbufSize.area();
+
     int sstep = sbufSize.width;
     CV_SUM_OFS( nofs[0], nofs[1], nofs[2], nofs[3], 0, normrect, sstep );
 
@@ -676,6 +679,10 @@ void HaarEvaluator::computeOptFeatures()
     copyVectorToUMat(*optfeatures_lbuf, ufbuf);
 }
 
+bool HaarEvaluator::setImage(InputArray _image, const std::vector<float>& _scales){
+    tofs = 0;
+    return FeatureEvaluator::setImage(_image, _scales);
+}
 
 bool HaarEvaluator::setWindow( Point pt, int scaleIdx )
 {
@@ -1268,7 +1275,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
         scales.push_back((float)factor);
     }
 
-    if( !featureEvaluator->setImage(gray, scales) )
+    if( scales.size() == 0 || !featureEvaluator->setImage(gray, scales) )
         return;
 
     // OpenCL code
diff --git a/modules/objdetect/src/cascadedetect.hpp b/modules/objdetect/src/cascadedetect.hpp
index 4cbf3e9bf..156f0468d 100644
--- a/modules/objdetect/src/cascadedetect.hpp
+++ b/modules/objdetect/src/cascadedetect.hpp
@@ -347,6 +347,7 @@ public:
     virtual Ptr<FeatureEvaluator> clone() const;
     virtual int getFeatureType() const { return FeatureEvaluator::HAAR; }
 
+    virtual bool setImage(InputArray _image, const std::vector<float>& _scales);
     virtual bool setWindow(Point p, int scaleIdx);
     Rect getNormRect() const;
     int getSquaresOffset() const;
diff --git a/modules/objdetect/src/opencl/cascadedetect.cl b/modules/objdetect/src/opencl/cascadedetect.cl
index 854a7f617..7ab581a28 100644
--- a/modules/objdetect/src/opencl/cascadedetect.cl
+++ b/modules/objdetect/src/opencl/cascadedetect.cl
@@ -180,11 +180,11 @@ void runHaarClassifier(
                         int4 ofs = f->ofs[0];
                         sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
                         ofs = f->ofs[1];
-                        sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
+                        sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
                         if( weight.z > 0 )
                         {
                             ofs = f->ofs[2];
-                            sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
+                            sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
                         }
 
                         s += (sval < st.y*nf) ? st.z : st.w;
@@ -204,11 +204,11 @@ void runHaarClassifier(
 
                             sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
                             ofs = f->ofs[1];
-                            sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
+                            sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
                             if( weight.z > 0 )
                             {
                                 ofs = f->ofs[2];
-                                sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
+                                sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
                             }
 
                             idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
@@ -281,11 +281,12 @@ void runHaarClassifier(
                             int4 ofs = f->ofs[0];
                             float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
                             ofs = f->ofs[1];
-                            sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
+                            sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
                             //if( weight.z > 0 )
+                            if( fabs(weight.z) > 0 )
                             {
                                 ofs = f->ofs[2];
-                                sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
+                                sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
                             }
 
                             partsum += (sval < st.y*nf) ? st.z : st.w;
@@ -303,11 +304,11 @@ void runHaarClassifier(
 
                                 float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
                                 ofs = f->ofs[1];
-                                sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
+                                sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval);
                                 if( weight.z > 0 )
                                 {
                                     ofs = f->ofs[2];
-                                    sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
+                                    sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval);
                                 }
 
                                 idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
diff --git a/modules/photo/include/opencv2/photo.hpp b/modules/photo/include/opencv2/photo.hpp
index 2d1087e89..c651b9ee3 100644
--- a/modules/photo/include/opencv2/photo.hpp
+++ b/modules/photo/include/opencv2/photo.hpp
@@ -119,7 +119,7 @@ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask,
 <http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
 optimizations. Noise expected to be a gaussian white noise
 
-@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
+@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image.
 @param dst Output image with the same size and type as src .
 @param templateWindowSize Size in pixels of the template patch that is used to compute weights.
 Should be odd. Recommended value 7 pixels
@@ -138,6 +138,35 @@ parameter.
 CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3,
         int templateWindowSize = 7, int searchWindowSize = 21);
 
+/** @brief Perform image denoising using Non-local Means Denoising algorithm
+<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
+optimizations. Noise expected to be a gaussian white noise
+
+@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
+2-channel, 3-channel or 4-channel image.
+@param dst Output image with the same size and type as src .
+@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
+Should be odd. Recommended value 7 pixels
+@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
+given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
+denoising time. Recommended value 21 pixels
+@param h Array of parameters regulating filter strength, either one
+parameter applied to all channels or one per channel in dst. Big h value
+perfectly removes noise but also removes image details, smaller h
+value preserves details but also preserves some noise
+@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
+
+This function expected to be applied to grayscale images. For colored images look at
+fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored
+image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting
+image to CIELAB colorspace and then separately denoise L and AB components with different h
+parameter.
+ */
+CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst,
+                                        const std::vector<float>& h,
+                                        int templateWindowSize = 7, int searchWindowSize = 21,
+                                        int normType = NORM_L2);
+
 /** @brief Modification of fastNlMeansDenoising function for colored images
 
 @param src Input 8-bit 3-channel image.
@@ -165,7 +194,35 @@ captured in small period of time. For example video. This version of the functio
 images or for manual manipulation with colorspaces. For more details see
 <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
 
-@param srcImgs Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should
+@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or
+4-channel images sequence. All images should have the same type and
+size.
+@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
+@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
+be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
+imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise
+srcImgs[imgToDenoiseIndex] image.
+@param dst Output image with the same size and type as srcImgs images.
+@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
+Should be odd. Recommended value 7 pixels
+@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
+given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
+denoising time. Recommended value 21 pixels
+@param h Parameter regulating filter strength. Bigger h value
+perfectly removes noise but also removes image details, smaller h
+value preserves details but also preserves some noise
+ */
+CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
+        int imgToDenoiseIndex, int temporalWindowSize,
+        float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
+
+/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been
+captured in small period of time. For example video. This version of the function is for grayscale
+images or for manual manipulation with colorspaces. For more details see
+<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
+
+@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
+2-channel, 3-channel or 4-channel images sequence. All images should
 have the same type and size.
 @param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
 @param temporalWindowSize Number of surrounding images to use for target image denoising. Should
@@ -178,13 +235,17 @@ Should be odd. Recommended value 7 pixels
 @param searchWindowSize Size in pixels of the window that is used to compute weighted average for
 given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
 denoising time. Recommended value 21 pixels
-@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly
-removes noise but also removes image details, smaller h value preserves details but also preserves
-some noise
+@param h Array of parameters regulating filter strength, either one
+parameter applied to all channels or one per channel in dst. Big h value
+perfectly removes noise but also removes image details, smaller h
+value preserves details but also preserves some noise
+@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
  */
 CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
-        int imgToDenoiseIndex, int temporalWindowSize,
-        float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
+                                             int imgToDenoiseIndex, int temporalWindowSize,
+                                             const std::vector<float>& h,
+                                             int templateWindowSize = 7, int searchWindowSize = 21,
+                                             int normType = NORM_L2);
 
 /** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences
 
diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp
index b4767a738..c68d09b92 100644
--- a/modules/photo/src/denoising.cpp
+++ b/modules/photo/src/denoising.cpp
@@ -45,42 +45,115 @@
 #include "fast_nlmeans_multi_denoising_invoker.hpp"
 #include "fast_nlmeans_denoising_opencl.hpp"
 
+template<typename ST, typename IT, typename UIT, typename D>
+static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector<float>& h,
+                                   int templateWindowSize, int searchWindowSize)
+{
+    int hn = (int)h.size();
+
+    switch (CV_MAT_CN(src.type())) {
+        case 1:
+            parallel_for_(cv::Range(0, src.rows),
+                          FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
+                              src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        case 2:
+            if (hn == 1)
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            else
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        case 3:
+            if (hn == 1)
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            else
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        case 4:
+            if (hn == 1)
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            else
+                parallel_for_(cv::Range(0, src.rows),
+                              FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
+                                  src, dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        default:
+            CV_Error(Error::StsBadArg,
+                     "Unsupported number of channels! Only 1, 2, 3, and 4 are supported");
+    }
+}
+
 void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
                                int templateWindowSize, int searchWindowSize)
 {
+    fastNlMeansDenoising(_src, _dst, std::vector<float>(1, h),
+                         templateWindowSize, searchWindowSize);
+}
+
+void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vector<float>& h,
+                               int templateWindowSize, int searchWindowSize, int normType)
+{
+    int hn = (int)h.size(), type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    CV_Assert(hn == 1 || hn == cn);
+
     Size src_size = _src.size();
     CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) &&
                src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes
-               ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize))
+               ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn,
+                                        templateWindowSize, searchWindowSize, normType))
 
     Mat src = _src.getMat();
     _dst.create(src_size, src.type());
     Mat dst = _dst.getMat();
 
+    switch (normType) {
+        case NORM_L2:
 #ifdef HAVE_TEGRA_OPTIMIZATION
-    if(tegra::useTegra() && tegra::fastNlMeansDenoising(src, dst, h, templateWindowSize, searchWindowSize))
-        return;
+            if(hn == 1 && tegra::useTegra() &&
+               tegra::fastNlMeansDenoising(src, dst, h[0], templateWindowSize, searchWindowSize))
+                return;
 #endif
-
-    switch (src.type()) {
-        case CV_8U:
-            parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<uchar>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+            switch (depth) {
+                case CV_8U:
+                    fastNlMeansDenoising_<uchar, int, unsigned, DistSquared>(src, dst, h,
+                                                                             templateWindowSize,
+                                                                             searchWindowSize);
+                    break;
+                default:
+                    CV_Error(Error::StsBadArg,
+                             "Unsupported depth! Only CV_8U is supported for NORM_L2");
+            }
             break;
-        case CV_8UC2:
-            parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<cv::Vec2b>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
-            break;
-        case CV_8UC3:
-            parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<cv::Vec3b>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+        case NORM_L1:
+            switch (depth) {
+                case CV_8U:
+                    fastNlMeansDenoising_<uchar, int, unsigned, DistAbs>(src, dst, h,
+                                                                         templateWindowSize,
+                                                                         searchWindowSize);
+                    break;
+                case CV_16U:
+                    fastNlMeansDenoising_<ushort, int64, uint64, DistAbs>(src, dst, h,
+                                                                          templateWindowSize,
+                                                                          searchWindowSize);
+                    break;
+                default:
+                    CV_Error(Error::StsBadArg,
+                             "Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1");
+            }
             break;
         default:
             CV_Error(Error::StsBadArg,
-                "Unsupported image format! Only CV_8UC1, CV_8UC2 and CV_8UC3 are supported");
+                     "Unsupported norm type! Only NORM_L2 and NORM_L1 are supported");
     }
 }
 
@@ -92,7 +165,7 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
     Size src_size = _src.size();
     if (type != CV_8UC3 && type != CV_8UC4)
     {
-        CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!");
+        CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3 or CV_8UC4!");
         return;
     }
 
@@ -108,8 +181,8 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
     Mat src_lab;
     cvtColor(src, src_lab, COLOR_LBGR2Lab);
 
-    Mat l(src_size, CV_8U);
-    Mat ab(src_size, CV_8UC2);
+    Mat l(src_size, CV_MAKE_TYPE(depth, 1));
+    Mat ab(src_size, CV_MAKE_TYPE(depth, 2));
     Mat l_ab[] = { l, ab };
     int from_to[] = { 0,0, 1,1, 2,2 };
     mixChannels(&src_lab, 1, l_ab, 2, from_to, 3);
@@ -157,9 +230,76 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
         }
 }
 
+template<typename ST, typename IT, typename UIT, typename D>
+static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& dst,
+                                        int imgToDenoiseIndex, int temporalWindowSize,
+                                        const std::vector<float>& h,
+                                        int templateWindowSize, int searchWindowSize)
+{
+    int hn = (int)h.size();
+
+    switch (srcImgs[0].type())
+    {
+        case CV_8U:
+            parallel_for_(cv::Range(0, srcImgs[0].rows),
+                          FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>(
+                              srcImgs, imgToDenoiseIndex, temporalWindowSize,
+                              dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        case CV_8UC2:
+            if (hn == 1)
+                parallel_for_(cv::Range(0, srcImgs[0].rows),
+                              FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
+                                  srcImgs, imgToDenoiseIndex, temporalWindowSize,
+                                  dst, templateWindowSize, searchWindowSize, &h[0]));
+            else
+                parallel_for_(cv::Range(0, srcImgs[0].rows),
+                              FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
+                                  srcImgs, imgToDenoiseIndex, temporalWindowSize,
+                                  dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        case CV_8UC3:
+            if (hn == 1)
+                parallel_for_(cv::Range(0, srcImgs[0].rows),
+                              FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
+                                  srcImgs, imgToDenoiseIndex, temporalWindowSize,
+                                  dst, templateWindowSize, searchWindowSize, &h[0]));
+            else
+                parallel_for_(cv::Range(0, srcImgs[0].rows),
+                              FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
+                                  srcImgs, imgToDenoiseIndex, temporalWindowSize,
+                                  dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        case CV_8UC4:
+            if (hn == 1)
+                parallel_for_(cv::Range(0, srcImgs[0].rows),
+                              FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
+                                  srcImgs, imgToDenoiseIndex, temporalWindowSize,
+                                  dst, templateWindowSize, searchWindowSize, &h[0]));
+            else
+                parallel_for_(cv::Range(0, srcImgs[0].rows),
+                              FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
+                                  srcImgs, imgToDenoiseIndex, temporalWindowSize,
+                                  dst, templateWindowSize, searchWindowSize, &h[0]));
+            break;
+        default:
+            CV_Error(Error::StsBadArg,
+                "Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported");
+    }
+}
+
 void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
                                     int imgToDenoiseIndex, int temporalWindowSize,
                                     float h, int templateWindowSize, int searchWindowSize)
+{
+    fastNlMeansDenoisingMulti(_srcImgs, _dst, imgToDenoiseIndex, temporalWindowSize,
+                              std::vector<float>(1, h), templateWindowSize, searchWindowSize);
+}
+
+void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
+                                    int imgToDenoiseIndex, int temporalWindowSize,
+                                    const std::vector<float>& h,
+                                    int templateWindowSize, int searchWindowSize, int normType)
 {
     std::vector<Mat> srcImgs;
     _srcImgs.getMatVector(srcImgs);
@@ -168,32 +308,52 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
         srcImgs, imgToDenoiseIndex,
         temporalWindowSize, templateWindowSize, searchWindowSize);
 
+    int hn = (int)h.size();
+    int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    CV_Assert(hn == 1 || hn == cn);
+
     _dst.create(srcImgs[0].size(), srcImgs[0].type());
     Mat dst = _dst.getMat();
 
-    switch (srcImgs[0].type())
-    {
-        case CV_8U:
-            parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<uchar>(
-                    srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+    switch (normType) {
+        case NORM_L2:
+            switch (depth) {
+                case CV_8U:
+                    fastNlMeansDenoisingMulti_<uchar, int, unsigned,
+                                               DistSquared>(srcImgs, dst,
+                                                            imgToDenoiseIndex, temporalWindowSize,
+                                                            h,
+                                                            templateWindowSize, searchWindowSize);
+                    break;
+                default:
+                    CV_Error(Error::StsBadArg,
+                             "Unsupported depth! Only CV_8U is supported for NORM_L2");
+            }
             break;
-        case CV_8UC2:
-            parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
-                    srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
-            break;
-        case CV_8UC3:
-            parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
-                    srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+        case NORM_L1:
+            switch (depth) {
+                case CV_8U:
+                    fastNlMeansDenoisingMulti_<uchar, int, unsigned,
+                                               DistAbs>(srcImgs, dst,
+                                                        imgToDenoiseIndex, temporalWindowSize,
+                                                        h,
+                                                        templateWindowSize, searchWindowSize);
+                    break;
+                case CV_16U:
+                    fastNlMeansDenoisingMulti_<ushort, int64, uint64,
+                                               DistAbs>(srcImgs, dst,
+                                                        imgToDenoiseIndex, temporalWindowSize,
+                                                        h,
+                                                        templateWindowSize, searchWindowSize);
+                    break;
+                default:
+                    CV_Error(Error::StsBadArg,
+                             "Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1");
+            }
             break;
         default:
             CV_Error(Error::StsBadArg,
-                "Unsupported matrix format! Only uchar, Vec2b, Vec3b are supported");
+                     "Unsupported norm type! Only NORM_L2 and NORM_L1 are supported");
     }
 }
 
@@ -212,9 +372,10 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
     _dst.create(srcImgs[0].size(), srcImgs[0].type());
     Mat dst = _dst.getMat();
 
+    int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type);
     int src_imgs_size = static_cast<int>(srcImgs.size());
 
-    if (srcImgs[0].type() != CV_8UC3)
+    if (type != CV_8UC3)
     {
         CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!");
         return;
@@ -228,9 +389,9 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
     std::vector<Mat> ab(src_imgs_size);
     for (int i = 0; i < src_imgs_size; i++)
     {
-        src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3);
-        l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1);
-        ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2);
+        src_lab[i] = Mat::zeros(srcImgs[0].size(), type);
+        l[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 1));
+        ab[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 2));
         cvtColor(srcImgs[i], src_lab[i], COLOR_LBGR2Lab);
 
         Mat l_ab[] = { l[i], ab[i] };
diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
index b8f5a0392..cfa9826ae 100644
--- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
+++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
@@ -50,13 +50,13 @@
 
 using namespace cv;
 
-template <typename T>
+template <typename T, typename IT, typename UIT, typename D, typename WT>
 struct FastNlMeansDenoisingInvoker :
         public ParallelLoopBody
 {
 public:
     FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
-        int template_window_size, int search_window_size, const float h);
+        int template_window_size, int search_window_size, const float *h);
 
     void operator() (const Range& range) const;
 
@@ -75,9 +75,9 @@ private:
     int template_window_half_size_;
     int search_window_half_size_;
 
-    int fixed_point_mult_;
+    typename pixelInfo<WT>::sampleType fixed_point_mult_;
     int almost_template_window_size_sq_bin_shift_;
-    std::vector<int> almost_dist2weight_;
+    std::vector<WT> almost_dist2weight_;
 
     void calcDistSumsForFirstElementInRow(
         int i, Array2d<int>& dist_sums,
@@ -99,15 +99,15 @@ inline int getNearestPowerOf2(int value)
     return p;
 }
 
-template <class T>
-FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(
     const Mat& src, Mat& dst,
     int template_window_size,
     int search_window_size,
-    const float h) :
+    const float *h) :
     src_(src), dst_(dst)
 {
-    CV_Assert(src.channels() == sizeof(T)); //T is Vec1b or Vec2b or Vec3b
+    CV_Assert(src.channels() == pixelInfo<T>::channels);
 
     template_window_half_size_ = template_window_size / 2;
     search_window_half_size_   = search_window_size   / 2;
@@ -117,8 +117,10 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
     border_size_ = search_window_half_size_ + template_window_half_size_;
     copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
 
-    const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255;
-    fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
+    const IT max_estimate_sum_value =
+        (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
+    fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
+                                          pixelInfo<WT>::sampleMax());
 
     // precalc weight for every possible l2 dist between blocks
     // additional optimization of precalced weights to replace division(averaging) by binary shift
@@ -127,30 +129,24 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
     almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
     double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
 
-    int max_dist = 255 * 255 * sizeof(T);
+    int max_dist = D::template maxDist<T>();
     int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
     almost_dist2weight_.resize(almost_max_dist);
 
-    const double WEIGHT_THRESHOLD = 0.001;
     for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
     {
         double dist = almost_dist * almost_dist2actual_dist_multiplier;
-        int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
-
-        if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
-            weight = 0;
-
-        almost_dist2weight_[almost_dist] = weight;
+        almost_dist2weight_[almost_dist] =
+            D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
     }
-    CV_Assert(almost_dist2weight_[0] == fixed_point_mult_);
 
     // additional optimization init end
     if (dst_.empty())
         dst_ = Mat::zeros(src_.size(), src_.type());
 }
 
-template <class T>
-void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
 {
     int row_from = range.start;
     int row_to = range.end - 1;
@@ -215,7 +211,7 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
                             dist_sums_row[x] -= col_dist_sums_row[x];
 
                             int bx = start_bx + x;
-                            col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
+                            col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
 
                             dist_sums_row[x] += col_dist_sums_row[x];
                             up_col_dist_sums_row[x] = col_dist_sums_row[x];
@@ -227,9 +223,11 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
             }
 
             // calc weights
-            int estimation[3], weights_sum = 0;
-            for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
+            IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
+            for (int channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
                 estimation[channel_num] = 0;
+            for (int channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
+                weights_sum[channel_num] = 0;
 
             for (int y = 0; y < search_window_size_; y++)
             {
@@ -238,24 +236,21 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
                 for (int x = 0; x < search_window_size_; x++)
                 {
                     int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
-                    int weight = almost_dist2weight_[almostAvgDist];
-                    weights_sum += weight;
-
+                    WT weight = almost_dist2weight_[almostAvgDist];
                     T p = cur_row_ptr[border_size_ + search_window_x + x];
-                    incWithWeight(estimation, weight, p);
+                    incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
                 }
             }
 
-            for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
-                estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum/2) / weights_sum;
-
-            dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
+            divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
+                                                                                      weights_sum);
+            dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
         }
     }
 }
 
-template <class T>
-inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
     int i,
     Array2d<int>& dist_sums,
     Array3d<int>& col_dist_sums,
@@ -276,7 +271,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
             for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
                 for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
                 {
-                    int dist = calcDist<T>(extended_src_,
+                    int dist = D::template calcDist<T>(extended_src_,
                         border_size_ + i + ty, border_size_ + j + tx,
                         border_size_ + start_y + ty, border_size_ + start_x + tx);
 
@@ -288,8 +283,8 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
         }
 }
 
-template <class T>
-inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
     int i, int j, int first_col_num,
     Array2d<int>& dist_sums,
     Array3d<int>& col_dist_sums,
@@ -312,7 +307,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
             int by = start_by + y;
             int bx = start_bx + x;
             for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
-                col_dist_sums[new_last_col_num][y][x] += calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
+                col_dist_sums[new_last_col_num][y][x] += D::template calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
 
             dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
             up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];
diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp
index ab7db5d2d..d8eb34417 100644
--- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp
+++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp
@@ -44,118 +44,446 @@
 
 using namespace cv;
 
-template <typename T> static inline int calcDist(const T a, const T b);
+// std::isnan is a part of C++11 and it is not supported in MSVS2010/2012
+#if defined _MSC_VER && _MSC_VER < 1800 /* MSVC 2013 */
+#include <float.h>
+namespace std {
+template <typename T> bool isnan(T value) { return _isnan(value) != 0; }
+}
+#endif
 
-template <> inline int calcDist(const uchar a, const uchar b)
+template <typename T> struct pixelInfo_
 {
-    return (a-b) * (a-b);
+    static const int channels = 1;
+    typedef T sampleType;
+};
+
+template <typename ET, int n> struct pixelInfo_<Vec<ET, n> >
+{
+    static const int channels = n;
+    typedef ET sampleType;
+};
+
+template <typename T> struct pixelInfo: public pixelInfo_<T>
+{
+    typedef typename pixelInfo_<T>::sampleType sampleType;
+
+    static inline sampleType sampleMax()
+    {
+        return std::numeric_limits<sampleType>::max();
+    }
+
+    static inline sampleType sampleMin()
+    {
+        return std::numeric_limits<sampleType>::min();
+    }
+
+    static inline size_t sampleBytes()
+    {
+        return sizeof(sampleType);
+    }
+
+    static inline size_t sampleBits()
+    {
+        return 8*sampleBytes();
+    }
+};
+
+class DistAbs
+{
+    template <typename T> struct calcDist_
+    {
+        static inline int f(const T a, const T b)
+        {
+            return std::abs((int)(a-b));
+        }
+    };
+
+    template <typename ET> struct calcDist_<Vec<ET, 2> >
+    {
+        static inline int f(const Vec<ET, 2> a, const Vec<ET, 2> b)
+        {
+            return std::abs((int)(a[0]-b[0])) + std::abs((int)(a[1]-b[1]));
+        }
+    };
+
+    template <typename ET> struct calcDist_<Vec<ET, 3> >
+    {
+        static inline int f(const Vec<ET, 3> a, const Vec<ET, 3> b)
+        {
+            return
+                std::abs((int)(a[0]-b[0])) +
+                std::abs((int)(a[1]-b[1])) +
+                std::abs((int)(a[2]-b[2]));
+        }
+    };
+
+    template <typename ET> struct calcDist_<Vec<ET, 4> >
+    {
+        static inline int f(const Vec<ET, 4> a, const Vec<ET, 4> b)
+        {
+            return
+                std::abs((int)(a[0]-b[0])) +
+                std::abs((int)(a[1]-b[1])) +
+                std::abs((int)(a[2]-b[2])) +
+                std::abs((int)(a[3]-b[3]));
+        }
+    };
+
+    template <typename T, typename WT> struct calcWeight_
+    {
+        static inline WT f(double dist, const float *h, WT fixed_point_mult)
+        {
+            double w = std::exp(-dist*dist / (h[0]*h[0] * pixelInfo<T>::channels));
+            if (std::isnan(w)) w = 1.0; // Handle h = 0.0
+
+            static const double WEIGHT_THRESHOLD = 0.001;
+            WT weight = (WT)cvRound(fixed_point_mult * w);
+            if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0;
+
+            return weight;
+        }
+    };
+
+    template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
+    {
+        static inline Vec<ET, n> f(double dist, const float *h, ET fixed_point_mult)
+        {
+            Vec<ET, n> res;
+            for (int i=0; i<n; i++)
+                res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
+            return res;
+        }
+    };
+
+public:
+    template <typename T> static inline int calcDist(const T a, const T b)
+    {
+        return calcDist_<T>::f(a, b);
+    }
+
+    template <typename T>
+    static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
+    {
+        const T a = m.at<T>(i1, j1);
+        const T b = m.at<T>(i2, j2);
+        return calcDist<T>(a,b);
+    }
+
+    template <typename T>
+    static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
+    {
+        return calcDist<T>(a_down, b_down) - calcDist<T>(a_up, b_up);
+    };
+
+    template <typename T, typename WT>
+    static inline WT calcWeight(double dist, const float *h,
+                                typename pixelInfo<WT>::sampleType fixed_point_mult)
+    {
+        return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
+    }
+
+    template <typename T>
+    static inline int maxDist()
+    {
+        return (int)pixelInfo<T>::sampleMax() * pixelInfo<T>::channels;
+    }
+};
+
+class DistSquared
+{
+    template <typename T> struct calcDist_
+    {
+        static inline int f(const T a, const T b)
+        {
+            return (int)(a-b) * (int)(a-b);
+        }
+    };
+
+    template <typename ET> struct calcDist_<Vec<ET, 2> >
+    {
+        static inline int f(const Vec<ET, 2> a, const Vec<ET, 2> b)
+        {
+            return (int)(a[0]-b[0])*(int)(a[0]-b[0]) + (int)(a[1]-b[1])*(int)(a[1]-b[1]);
+        }
+    };
+
+    template <typename ET> struct calcDist_<Vec<ET, 3> >
+    {
+        static inline int f(const Vec<ET, 3> a, const Vec<ET, 3> b)
+        {
+            return
+                (int)(a[0]-b[0])*(int)(a[0]-b[0]) +
+                (int)(a[1]-b[1])*(int)(a[1]-b[1]) +
+                (int)(a[2]-b[2])*(int)(a[2]-b[2]);
+        }
+    };
+
+    template <typename ET> struct calcDist_<Vec<ET, 4> >
+    {
+        static inline int f(const Vec<ET, 4> a, const Vec<ET, 4> b)
+        {
+            return
+                (int)(a[0]-b[0])*(int)(a[0]-b[0]) +
+                (int)(a[1]-b[1])*(int)(a[1]-b[1]) +
+                (int)(a[2]-b[2])*(int)(a[2]-b[2]) +
+                (int)(a[3]-b[3])*(int)(a[3]-b[3]);
+        }
+    };
+
+    template <typename T> struct calcUpDownDist_
+    {
+        static inline int f(T a_up, T a_down, T b_up, T b_down)
+        {
+            int A = a_down - b_down;
+            int B = a_up - b_up;
+            return (A-B)*(A+B);
+        }
+    };
+
+    template <typename ET, int n> struct calcUpDownDist_<Vec<ET, n> >
+    {
+    private:
+        typedef Vec<ET, n> T;
+    public:
+        static inline int f(T a_up, T a_down, T b_up, T b_down)
+        {
+            return calcDist<T>(a_down, b_down) - calcDist<T>(a_up, b_up);
+        }
+    };
+
+    template <typename T, typename WT> struct calcWeight_
+    {
+        static inline WT f(double dist, const float *h, WT fixed_point_mult)
+        {
+            double w = std::exp(-dist / (h[0]*h[0] * pixelInfo<T>::channels));
+            if (std::isnan(w)) w = 1.0; // Handle h = 0.0
+
+            static const double WEIGHT_THRESHOLD = 0.001;
+            WT weight = (WT)cvRound(fixed_point_mult * w);
+            if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0;
+
+            return weight;
+        }
+    };
+
+    template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
+    {
+        static inline Vec<ET, n> f(double dist, const float *h, ET fixed_point_mult)
+        {
+            Vec<ET, n> res;
+            for (int i=0; i<n; i++)
+                res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
+            return res;
+        }
+    };
+
+public:
+    template <typename T> static inline int calcDist(const T a, const T b)
+    {
+        return calcDist_<T>::f(a, b);
+    }
+
+    template <typename T>
+    static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
+    {
+        const T a = m.at<T>(i1, j1);
+        const T b = m.at<T>(i2, j2);
+        return calcDist<T>(a,b);
+    }
+
+    template <typename T>
+    static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
+    {
+        return calcUpDownDist_<T>::f(a_up, a_down, b_up, b_down);
+    };
+
+    template <typename T, typename WT>
+    static inline WT calcWeight(double dist, const float *h,
+                                typename pixelInfo<WT>::sampleType fixed_point_mult)
+    {
+        return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
+    }
+
+    template <typename T>
+    static inline int maxDist()
+    {
+        return (int)pixelInfo<T>::sampleMax() * (int)pixelInfo<T>::sampleMax() *
+            pixelInfo<T>::channels;
+    }
+};
+
+template <typename T, typename IT, typename WT> struct incWithWeight_
+{
+    static inline void f(IT* estimation, IT* weights_sum, WT weight, T p)
+    {
+        estimation[0] += (IT)weight * p;
+        weights_sum[0] += (IT)weight;
+    }
+};
+
+template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 2>, IT, WT>
+{
+    static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 2> p)
+    {
+        estimation[0] += (IT)weight * p[0];
+        estimation[1] += (IT)weight * p[1];
+        weights_sum[0] += (IT)weight;
+    }
+};
+
+template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 3>, IT, WT>
+{
+    static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 3> p)
+    {
+        estimation[0] += (IT)weight * p[0];
+        estimation[1] += (IT)weight * p[1];
+        estimation[2] += (IT)weight * p[2];
+        weights_sum[0] += (IT)weight;
+    }
+};
+
+template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 4>, IT, WT>
+{
+    static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 4> p)
+    {
+        estimation[0] += (IT)weight * p[0];
+        estimation[1] += (IT)weight * p[1];
+        estimation[2] += (IT)weight * p[2];
+        estimation[3] += (IT)weight * p[3];
+        weights_sum[0] += (IT)weight;
+    }
+};
+
+template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 2>, IT, Vec<EW, 2> >
+{
+    static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 2> weight, Vec<ET, 2> p)
+    {
+        estimation[0] += (IT)weight[0] * p[0];
+        estimation[1] += (IT)weight[1] * p[1];
+        weights_sum[0] += (IT)weight[0];
+        weights_sum[1] += (IT)weight[1];
+    }
+};
+
+template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 3>, IT, Vec<EW, 3> >
+{
+    static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 3> weight, Vec<ET, 3> p)
+    {
+        estimation[0] += (IT)weight[0] * p[0];
+        estimation[1] += (IT)weight[1] * p[1];
+        estimation[2] += (IT)weight[2] * p[2];
+        weights_sum[0] += (IT)weight[0];
+        weights_sum[1] += (IT)weight[1];
+        weights_sum[2] += (IT)weight[2];
+    }
+};
+
+template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 4>, IT, Vec<EW, 4> >
+{
+    static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 4> weight, Vec<ET, 4> p)
+    {
+        estimation[0] += (IT)weight[0] * p[0];
+        estimation[1] += (IT)weight[1] * p[1];
+        estimation[2] += (IT)weight[2] * p[2];
+        estimation[3] += (IT)weight[3] * p[3];
+        weights_sum[0] += (IT)weight[0];
+        weights_sum[1] += (IT)weight[1];
+        weights_sum[2] += (IT)weight[2];
+        weights_sum[3] += (IT)weight[3];
+    }
+};
+
+template <typename T, typename IT, typename WT>
+static inline void incWithWeight(IT* estimation, IT* weights_sum, WT weight, T p)
+{
+    return incWithWeight_<T, IT, WT>::f(estimation, weights_sum, weight, p);
 }
 
-template <> inline int calcDist(const Vec2b a, const Vec2b b)
+template <typename IT, typename UIT, int nc, int nw> struct divByWeightsSum_
 {
-    return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]);
+    static inline void f(IT* estimation, IT* weights_sum);
+};
+
+template <typename IT, typename UIT> struct divByWeightsSum_<IT, UIT, 1, 1>
+{
+    static inline void f(IT* estimation, IT* weights_sum)
+    {
+        estimation[0] = (static_cast<UIT>(estimation[0]) + weights_sum[0]/2) / weights_sum[0];
+    }
+};
+
+template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, 1>
+{
+    static inline void f(IT* estimation, IT* weights_sum)
+    {
+        for (size_t i = 0; i < n; i++)
+            estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[0]/2) / weights_sum[0];
+    }
+};
+
+template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, n>
+{
+    static inline void f(IT* estimation, IT* weights_sum)
+    {
+        for (size_t i = 0; i < n; i++)
+            estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[i]/2) / weights_sum[i];
+    }
+};
+
+template <typename IT, typename UIT, int nc, int nw>
+static inline void divByWeightsSum(IT* estimation, IT* weights_sum)
+{
+    return divByWeightsSum_<IT, UIT, nc, nw>::f(estimation, weights_sum);
 }
 
-template <> inline int calcDist(const Vec3b a, const Vec3b b)
+template <typename T, typename IT> struct saturateCastFromArray_
 {
-    return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]);
-}
+    static inline T f(IT* estimation)
+    {
+        return saturate_cast<T>(estimation[0]);
+    }
+};
 
-template <typename T> static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
+template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 2>, IT>
 {
-    const T a = m.at<T>(i1, j1);
-    const T b = m.at<T>(i2, j2);
-    return calcDist<T>(a,b);
-}
+    static inline Vec<ET, 2> f(IT* estimation)
+    {
+        Vec<ET, 2> res;
+        res[0] = saturate_cast<ET>(estimation[0]);
+        res[1] = saturate_cast<ET>(estimation[1]);
+        return res;
+    }
+};
 
-template <typename T> static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
+template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 3>, IT>
 {
-    return calcDist(a_down, b_down) - calcDist(a_up, b_up);
-}
+    static inline Vec<ET, 3> f(IT* estimation)
+    {
+        Vec<ET, 3> res;
+        res[0] = saturate_cast<ET>(estimation[0]);
+        res[1] = saturate_cast<ET>(estimation[1]);
+        res[2] = saturate_cast<ET>(estimation[2]);
+        return res;
+    }
+};
 
-template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar  b_up, uchar b_down)
+template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 4>, IT>
 {
-    int A = a_down - b_down;
-    int B = a_up - b_up;
-    return (A-B)*(A+B);
-}
+    static inline Vec<ET, 4> f(IT* estimation)
+    {
+        Vec<ET, 4> res;
+        res[0] = saturate_cast<ET>(estimation[0]);
+        res[1] = saturate_cast<ET>(estimation[1]);
+        res[2] = saturate_cast<ET>(estimation[2]);
+        res[3] = saturate_cast<ET>(estimation[3]);
+        return res;
+    }
+};
 
-template <typename T> static inline void incWithWeight(int* estimation, int weight, T p);
-
-template <> inline void incWithWeight(int* estimation, int weight, uchar p)
+template <typename T, typename IT> static inline T saturateCastFromArray(IT* estimation)
 {
-    estimation[0] += weight * p;
-}
-
-template <> inline void incWithWeight(int* estimation, int weight, Vec2b p)
-{
-    estimation[0] += weight * p[0];
-    estimation[1] += weight * p[1];
-}
-
-template <> inline void incWithWeight(int* estimation, int weight, Vec3b p)
-{
-    estimation[0] += weight * p[0];
-    estimation[1] += weight * p[1];
-    estimation[2] += weight * p[2];
-}
-
-template <> inline void incWithWeight(int* estimation, int weight, int p)
-{
-    estimation[0] += weight * p;
-}
-
-template <> inline void incWithWeight(int* estimation, int weight, Vec2i p)
-{
-    estimation[0] += weight * p[0];
-    estimation[1] += weight * p[1];
-}
-
-template <> inline void incWithWeight(int* estimation, int weight, Vec3i p)
-{
-    estimation[0] += weight * p[0];
-    estimation[1] += weight * p[1];
-    estimation[2] += weight * p[2];
-}
-
-template <typename T> static inline T saturateCastFromArray(int* estimation);
-
-template <> inline uchar saturateCastFromArray(int* estimation)
-{
-    return saturate_cast<uchar>(estimation[0]);
-}
-
-template <> inline Vec2b saturateCastFromArray(int* estimation)
-{
-    Vec2b res;
-    res[0] = saturate_cast<uchar>(estimation[0]);
-    res[1] = saturate_cast<uchar>(estimation[1]);
-    return res;
-}
-
-template <> inline Vec3b saturateCastFromArray(int* estimation)
-{
-    Vec3b res;
-    res[0] = saturate_cast<uchar>(estimation[0]);
-    res[1] = saturate_cast<uchar>(estimation[1]);
-    res[2] = saturate_cast<uchar>(estimation[2]);
-    return res;
-}
-
-template <> inline int saturateCastFromArray(int* estimation)
-{
-    return estimation[0];
-}
-
-template <> inline Vec2i saturateCastFromArray(int* estimation)
-{
-    estimation[1] = 0;
-    return Vec2i(estimation);
-}
-
-template <> inline Vec3i saturateCastFromArray(int* estimation)
-{
-    return Vec3i(estimation);
+    return saturateCastFromArray_<T, IT>::f(estimation);
 }
 
 #endif
diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp
index 1cdd8fa49..1c511f37b 100644
--- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp
+++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp
@@ -28,12 +28,16 @@ static int divUp(int a, int b)
     return (a + b - 1) / b;
 }
 
-template <typename FT>
-static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
+template <typename FT, typename ST, typename WT>
+static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight,
+                                      int searchWindowSize, int templateWindowSize,
+                                      const FT *h, int hn, int cn, int normType,
                                       int & almostTemplateWindowSizeSqBinShift)
 {
-    const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
-    int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
+    const WT maxEstimateSumValue = searchWindowSize * searchWindowSize *
+        std::numeric_limits<ST>::max();
+    int fixedPointMult = (int)std::min<WT>(std::numeric_limits<WT>::max() / maxEstimateSumValue,
+                                           std::numeric_limits<int>::max());
     int depth = DataType<FT>::depth;
     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
 
@@ -48,33 +52,44 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
     FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
 
     const FT WEIGHT_THRESHOLD = 1e-3f;
-    int maxDist = 255 * 255 * cn;
+    int maxDist = normType == NORM_L1 ? std::numeric_limits<ST>::max() * cn :
+        std::numeric_limits<ST>::max() * std::numeric_limits<ST>::max() * cn;
     int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
-    FT den = 1.0f / (h * h * cn);
+    FT den[4];
+    CV_Assert(hn > 0 && hn <= 4);
+    for (int i=0; i<hn; i++)
+        den[i] = 1.0f / (h[i] * h[i] * cn);
 
-    almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
+    almostDist2Weight.create(1, almostMaxDist, CV_32SC(hn == 3 ? 4 : hn));
 
+    char buf[40];
     ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
-                  format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
-                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+                  format("-D OP_CALC_WEIGHTS -D FT=%s -D w_t=%s"
+                         " -D wlut_t=%s -D convert_wlut_t=%s%s%s",
+                         ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)),
+                         ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf),
+                         doubleSupport ? " -D DOUBLE_SUPPORT" : "",
+                         normType == NORM_L1 ? " -D ABS" : ""));
     if (k.empty())
         return false;
 
     k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
-           almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD);
+           almostDist2ActualDistMultiplier, fixedPointMult,
+           ocl::KernelArg::Constant(den, (hn == 3 ? 4 : hn)*sizeof(FT)), WEIGHT_THRESHOLD);
 
     size_t globalsize[1] = { almostMaxDist };
     return k.run(1, globalsize, NULL, false);
 }
 
-static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
-                                     int templateWindowSize, int searchWindowSize)
+static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn,
+                                     int templateWindowSize, int searchWindowSize, int normType)
 {
-    int type = _src.type(), cn = CV_MAT_CN(type);
+    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
     int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;
     Size size = _src.size();
 
-    if ( type != CV_8UC1 && type != CV_8UC2 && type != CV_8UC4 )
+    if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) &&
+                             (normType != NORM_L1 || (depth != CV_8U && depth != CV_16U))))
         return false;
 
     int templateWindowHalfWize = templateWindowSize / 2;
@@ -84,33 +99,68 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
     int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
     int almostTemplateWindowSizeSqBinShift = -1;
 
-    char cvt[2][40];
+    char buf[4][40];
     String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
-                         " -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
+                         " -D pixel_t=%s -D int_t=%s -D wlut_t=%s"
+                         " -D weight_t=%s -D convert_weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
+                         " -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
                          " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
-                         " -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s",
-                         templateWindowSize, searchWindowSize, ocl::typeToStr(type),
-                         ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, ctaSize,
-                         templateWindowHalfWize, searchWindowHalfSize,
-                         ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn,
-                         ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]));
+                         " -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s",
+                         templateWindowSize, searchWindowSize,
+                         ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
+                         ocl::typeToStr(CV_32SC(hn)),
+                         depth == CV_8U ? ocl::typeToStr(CV_32SC(hn)) :
+                         format("long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
+                         depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) :
+                         format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
+                         depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :
+                         format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
+                         depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) :
+                         format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
+                         BLOCK_COLS, BLOCK_ROWS,
+                         ctaSize, templateWindowHalfWize, searchWindowHalfSize,
+                         ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
+                         (depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn),
+                         ocl::convertTypeStr(CV_32S, depth, cn, buf[3]),
+                         normType == NORM_L1 ? " -D ABS" : "");
 
     ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
     if (k.empty())
         return false;
 
     UMat almostDist2Weight;
-    if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
-                                   almostTemplateWindowSizeSqBinShift))
+    if ((depth == CV_8U &&
+         !ocl_calcAlmostDist2Weight<float, uchar, int>(almostDist2Weight,
+                                                       searchWindowSize, templateWindowSize,
+                                                       h, hn, cn, normType,
+                                                       almostTemplateWindowSizeSqBinShift)) ||
+        (depth == CV_16U &&
+         !ocl_calcAlmostDist2Weight<float, ushort, int64>(almostDist2Weight,
+                                                          searchWindowSize, templateWindowSize,
+                                                          h, hn, cn, normType,
+                                                          almostTemplateWindowSizeSqBinShift)))
         return false;
     CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
 
     UMat srcex;
     int borderSize = searchWindowHalfSize + templateWindowHalfWize;
-    copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
+    if (cn == 3) {
+        srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4));
+        UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height));
+        int from_to[] = { 0,0, 1,1, 2,2 };
+        mixChannels(std::vector<UMat>(1, _src.getUMat()), std::vector<UMat>(1, src), from_to, 3);
+        copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize,
+                       BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place
+    }
+    else
+        copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
 
     _dst.create(size, type);
-    UMat dst = _dst.getUMat();
+    UMat dst;
+    if (cn == 3)
+        dst.create(size, CV_MAKE_TYPE(depth, 4));
+    else
+        dst = _dst.getUMat();
 
     int searchWindowSizeSq = searchWindowSize * searchWindowSize;
     Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
@@ -123,7 +173,14 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
            ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
 
     size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 };
-    return k.run(2, globalsize, localsize, false);
+    if (!k.run(2, globalsize, localsize, false)) return false;
+
+    if (cn == 3) {
+        int from_to[] = { 0,0, 1,1, 2,2 };
+        mixChannels(std::vector<UMat>(1, dst), std::vector<UMat>(1, _dst.getUMat()), from_to, 3);
+    }
+
+    return true;
 }
 
 static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
index 191a67127..3f13f400d 100644
--- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
+++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
@@ -50,14 +50,14 @@
 
 using namespace cv;
 
-template <typename T>
+template <typename T, typename IT, typename UIT, typename D, typename WT>
 struct FastNlMeansMultiDenoisingInvoker :
         ParallelLoopBody
 {
 public:
     FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex,
                                      int temporalWindowSize, Mat& dst, int template_window_size,
-                                     int search_window_size, const float h);
+                                     int search_window_size, const float *h);
 
     void operator() (const Range& range) const;
 
@@ -81,9 +81,9 @@ private:
     int search_window_half_size_;
     int temporal_window_half_size_;
 
-    int fixed_point_mult_;
+    typename pixelInfo<WT>::sampleType fixed_point_mult_;
     int almost_template_window_size_sq_bin_shift;
-    std::vector<int> almost_dist2weight;
+    std::vector<WT> almost_dist2weight;
 
     void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums,
                                           Array4d<int>& col_dist_sums,
@@ -94,19 +94,19 @@ private:
                                           Array4d<int>& up_col_dist_sums) const;
 };
 
-template <class T>
-FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansMultiDenoisingInvoker(
     const std::vector<Mat>& srcImgs,
     int imgToDenoiseIndex,
     int temporalWindowSize,
     cv::Mat& dst,
     int template_window_size,
     int search_window_size,
-    const float h) :
+    const float *h) :
         dst_(dst), extended_srcs_(srcImgs.size())
 {
     CV_Assert(srcImgs.size() > 0);
-    CV_Assert(srcImgs[0].channels() == sizeof(T));
+    CV_Assert(srcImgs[0].channels() == pixelInfo<T>::channels);
 
     rows_ = srcImgs[0].rows;
     cols_ = srcImgs[0].cols;
@@ -125,8 +125,10 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
             border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
 
     main_extended_src_ = extended_srcs_[temporal_window_half_size_];
-    const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255;
-    fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
+    const IT max_estimate_sum_value =
+        (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
+    fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
+                                          pixelInfo<WT>::sampleMax());
 
     // precalc weight for every possible l2 dist between blocks
     // additional optimization of precalced weights to replace division(averaging) by binary shift
@@ -138,30 +140,24 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
     int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
     double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
 
-    int max_dist = 255 * 255 * sizeof(T);
-    int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1);
+    int max_dist = D::template maxDist<T>();
+    int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
     almost_dist2weight.resize(almost_max_dist);
 
-    const double WEIGHT_THRESHOLD = 0.001;
     for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
     {
         double dist = almost_dist * almost_dist2actual_dist_multiplier;
-        int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
-
-        if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
-            weight = 0;
-
-        almost_dist2weight[almost_dist] = weight;
+        almost_dist2weight[almost_dist] =
+            D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
     }
-    CV_Assert(almost_dist2weight[0] == fixed_point_mult_);
 
     // additional optimization init end
     if (dst_.empty())
         dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
 }
 
-template <class T>
-void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
 {
     int row_from = range.start;
     int row_to = range.end - 1;
@@ -234,7 +230,7 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
                                 dist_sums_row[x] -= col_dist_sums_row[x];
 
                                 col_dist_sums_row[x] = up_col_dist_sums_row[x] +
-                                    calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
+                                    D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
 
                                 dist_sums_row[x] += col_dist_sums_row[x];
                                 up_col_dist_sums_row[x] = col_dist_sums_row[x];
@@ -247,11 +243,11 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
             }
 
             // calc weights
-            int weights_sum = 0;
-
-            int estimation[3];
-            for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
+            IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
+            for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
                 estimation[channel_num] = 0;
+            for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
+                weights_sum[channel_num] = 0;
 
             for (int d = 0; d < temporal_window_size_; d++)
             {
@@ -266,26 +262,22 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
                     {
                         int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
 
-                        int weight = almost_dist2weight[almostAvgDist];
-                        weights_sum += weight;
-
+                        WT weight =  almost_dist2weight[almostAvgDist];
                         T p = cur_row_ptr[border_size_ + search_window_x + x];
-                        incWithWeight(estimation, weight, p);
+                        incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
                     }
                 }
             }
 
-            for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
-                estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum / 2) / weights_sum;
-
-            dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
-
+            divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
+                                                                                      weights_sum);
+            dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
         }
     }
 }
 
-template <class T>
-inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
         int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
 {
     int j = 0;
@@ -310,7 +302,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
                 {
                     for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
                     {
-                        int dist = calcDist<T>(
+                        int dist = D::template calcDist<T>(
                                     main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx),
                                     cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx));
 
@@ -325,8 +317,8 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
     }
 }
 
-template <class T>
-inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
     int i, int j, int first_col_num, Array3d<int>& dist_sums,
     Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
 {
@@ -353,7 +345,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
                 int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x];
                 for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
                 {
-                    *col_dist_sums_ptr += calcDist<T>(
+                    *col_dist_sums_ptr += D::template calcDist<T>(
                                 main_extended_src_.at<T>(ay + ty, ax),
                                 cur_extended_src.at<T>(by + ty, bx));
                 }
diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl
index af3fb1f9b..879665f48 100644
--- a/modules/photo/src/opencl/nlmeans.cl
+++ b/modules/photo/src/opencl/nlmeans.cl
@@ -20,21 +20,23 @@
 
 #ifdef OP_CALC_WEIGHTS
 
-__kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist,
+__kernel void calcAlmostDist2Weight(__global wlut_t * almostDist2Weight, int almostMaxDist,
                                     FT almostDist2ActualDistMultiplier, int fixedPointMult,
-                                    FT den, FT WEIGHT_THRESHOLD)
+                                    w_t den, FT WEIGHT_THRESHOLD)
 {
     int almostDist = get_global_id(0);
 
     if (almostDist < almostMaxDist)
     {
         FT dist = almostDist * almostDist2ActualDistMultiplier;
-        int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den));
-
-        if (weight < WEIGHT_THRESHOLD * fixedPointMult)
-            weight = 0;
-
-        almostDist2Weight[almostDist] = weight;
+#ifdef ABS
+        w_t w = exp((w_t)(-dist*dist) * den);
+#else
+        w_t w = exp((w_t)(-dist) * den);
+#endif
+        wlut_t weight = convert_wlut_t(fixedPointMult * (isnan(w) ? (w_t)1.0 : w));
+        almostDist2Weight[almostDist] =
+            weight < (wlut_t)(WEIGHT_THRESHOLD * fixedPointMult) ? (wlut_t)0 : weight;
     }
 }
 
@@ -44,21 +46,35 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost
 
 #define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE)
 
-inline int calcDist(uchar_t a, uchar_t b)
+inline int calcDist(pixel_t a, pixel_t b)
 {
+#ifdef ABS
+    int_t retval = convert_int_t(abs_diff(a, b));
+#else
     int_t diff = convert_int_t(a) - convert_int_t(b);
     int_t retval = diff * diff;
+#endif
 
 #if cn == 1
     return retval;
 #elif cn == 2
     return retval.x + retval.y;
+#elif cn == 3
+    return retval.x + retval.y + retval.z;
+#elif cn == 4
+    return retval.x + retval.y + retval.z + retval.w;
 #else
-#error "cn should be either 1 or 2"
+#error "cn should be either 1, 2, 3 or 4"
 #endif
 }
 
-inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t)
+#ifdef ABS
+inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
+{
+    return calcDist(down_value, down_value_t) - calcDist(up_value, up_value_t);
+}
+#else
+inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
 {
     int_t A = convert_int_t(down_value) - convert_int_t(down_value_t);
     int_t B = convert_int_t(up_value) - convert_int_t(up_value_t);
@@ -68,10 +84,15 @@ inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_v
     return retval;
 #elif cn == 2
     return retval.x + retval.y;
+#elif cn == 3
+    return retval.x + retval.y + retval.z;
+#elif cn == 4
+    return retval.x + retval.y + retval.z + retval.w;
 #else
-#error "cn should be either 1 or 2"
+#error "cn should be either 1, 2, 3 or 4"
 #endif
 }
+#endif
 
 #define COND if (x == 0 && y == 0)
 
@@ -87,9 +108,9 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
     {
         int dist = 0, value;
 
-        __global const uchar_t * src_template = (__global const uchar_t *)(src +
-            mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
-        __global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
+        __global const pixel_t * src_template = (__global const pixel_t *)(src +
+            mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));
+        __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));
         __global int * col_dists_current = col_dists + i * TEMPLATE_SIZE;
 
         #pragma unroll
@@ -107,8 +128,8 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
                 dist += value;
             }
 
-            src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
-            src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
+            src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
+            src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
         }
 
         #pragma unroll
@@ -130,9 +151,9 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
 
     for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
     {
-        __global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
-        __global const uchar_t * src_template = (__global const uchar_t *)(src +
-            mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
+        __global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));
+        __global const pixel_t * src_template = (__global const pixel_t *)(src +
+            mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));
         __global int * col_dists_current = col_dists + TEMPLATE_SIZE * i;
 
         int col_dist = 0;
@@ -142,8 +163,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
         {
             col_dist += calcDist(src_current[0], src_template[0]);
 
-            src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
-            src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
+            src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
+            src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
         }
 
         dists[i] += col_dist - col_dists_current[first];
@@ -160,8 +181,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
     int sy_up = y - TEMPLATE_SIZE2 - 1;
     int sy_down = y + TEMPLATE_SIZE2;
 
-    uchar_t up_value = *(__global const uchar_t *)(src + mad24(sy_up, src_step, mad24(cn, sx, src_offset)));
-    uchar_t down_value = *(__global const uchar_t *)(src + mad24(sy_down, src_step, mad24(cn, sx, src_offset)));
+    pixel_t up_value = *(__global const pixel_t *)(src + mad24(sy_up, src_step, mad24(psz, sx, src_offset)));
+    pixel_t down_value = *(__global const pixel_t *)(src + mad24(sy_down, src_step, mad24(psz, sx, src_offset)));
 
     sx -= SEARCH_SIZE2;
     sy_up -= SEARCH_SIZE2;
@@ -171,8 +192,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
     {
         int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE;
 
-        uchar_t up_value_t = *(__global const uchar_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset)));
-        uchar_t down_value_t = *(__global const uchar_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset)));
+        pixel_t up_value_t = *(__global const pixel_t *)(src + mad24(sy_up + wy, src_step, mad24(psz, sx + wx, src_offset)));
+        pixel_t down_value_t = *(__global const pixel_t *)(src + mad24(sy_down + wy, src_step, mad24(psz, sx + wx, src_offset)));
 
         __global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first);
         __global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i);
@@ -186,24 +207,25 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
 }
 
 inline void convolveWindow(__global const uchar * src, int src_step, int src_offset,
-                           __local int * dists, __global const int * almostDist2Weight,
+                           __local int * dists, __global const wlut_t * almostDist2Weight,
                            __global uchar * dst, int dst_step, int dst_offset,
-                           int y, int x, int id, __local int * weights_local,
-                           __local int_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
+                           int y, int x, int id, __local weight_t * weights_local,
+                           __local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
 {
-    int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2, weights = 0;
-    int_t weighted_sum = (int_t)(0);
+    int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;
+    weight_t weights = (weight_t)0;
+    sum_t weighted_sum = (sum_t)0;
 
     for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
     {
-        int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, cn, src_offset));
-        int_t src_value = convert_int_t(*(__global const uchar_t *)(src + src_index));
+        int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, psz, src_offset));
+        sum_t src_value = convert_sum_t(*(__global const pixel_t *)(src + src_index));
 
         int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift;
-        int weight = almostDist2Weight[almostAvgDist];
+        weight_t weight = convert_weight_t(almostDist2Weight[almostAvgDist]);
 
         weights += weight;
-        weighted_sum += (int_t)(weight) * src_value;
+        weighted_sum += (sum_t)weight * src_value;
     }
 
     weights_local[id] = weights;
@@ -223,26 +245,27 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
 
     if (id == 0)
     {
-        int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset));
-        int_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
+        int dst_index = mad24(y, dst_step, mad24(psz, x, dst_offset));
+        sum_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
             weighted_sum_local[2] + weighted_sum_local[3];
-        int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
+        weight_t weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
 
-        *(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / (int_t)(weights_local_0));
+        *(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (sum_t)weights_local_0);
     }
 }
 
 __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset,
                                    __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
-                                   __global const int * almostDist2Weight, __global uchar * buffer,
+                                   __global const wlut_t * almostDist2Weight, __global uchar * buffer,
                                    int almostTemplateWindowSizeSqBinShift)
 {
     int block_x = get_group_id(0), nblocks_x = get_num_groups(0);
     int block_y = get_group_id(1);
     int id = get_local_id(0), first;
 
-    __local int dists[SEARCH_SIZE_SQ], weights[CTA_SIZE];
-    __local int_t weighted_sum[CTA_SIZE];
+    __local int dists[SEARCH_SIZE_SQ];
+    __local weight_t weights[CTA_SIZE];
+    __local sum_t weighted_sum[CTA_SIZE];
 
     int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols);
     int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows);
diff --git a/modules/photo/test/ocl/test_denoising.cpp b/modules/photo/test/ocl/test_denoising.cpp
index cb2d74f85..f749564c6 100644
--- a/modules/photo/test/ocl/test_denoising.cpp
+++ b/modules/photo/test/ocl/test_denoising.cpp
@@ -13,11 +13,11 @@
 namespace cvtest {
 namespace ocl {
 
-PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
+PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, int, bool, bool)
 {
-    int cn, templateWindowSize, searchWindowSize;
-    float h;
-    bool use_roi;
+    int cn, normType, templateWindowSize, searchWindowSize;
+    std::vector<float> h;
+    bool use_roi, use_image;
 
     TEST_DECLARE_INPUT_PARAMETER(src);
     TEST_DECLARE_OUTPUT_PARAMETER(dst);
@@ -25,29 +25,46 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
     virtual void SetUp()
     {
         cn = GET_PARAM(0);
-        use_roi = GET_PARAM(1);
+        normType = GET_PARAM(1);
+        use_roi = GET_PARAM(2);
+        use_image = GET_PARAM(3);
 
         templateWindowSize = 7;
         searchWindowSize = 21;
-        h = 3.0f;
+
+        h.resize(cn);
+        for (int i=0; i<cn; i++)
+            h[i] = 3.0f + 0.5f*i;
     }
 
     virtual void generateTestData()
     {
+        const int type = CV_8UC(cn);
         Mat image;
-        if (cn == 1)
-        {
-            image = readImage("denoising/lena_noised_gaussian_sigma=10.png", IMREAD_GRAYSCALE);
+
+        if (use_image) {
+            image = readImage("denoising/lena_noised_gaussian_sigma=10.png",
+                                  cn == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
             ASSERT_FALSE(image.empty());
         }
 
-        const int type = CV_8UC(cn);
-
-        Size roiSize = cn == 1 ? image.size() : randomSize(1, MAX_VALUE);
+        Size roiSize = use_image ? image.size() : randomSize(1, MAX_VALUE);
         Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
         randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255);
-        if (cn == 1)
-            image.copyTo(src_roi);
+        if (use_image) {
+            ASSERT_TRUE(cn > 0 && cn <= 4);
+            if (cn == 2) {
+                int from_to[] = { 0,0, 1,1 };
+                src_roi.create(roiSize, type);
+                mixChannels(&image, 1, &src_roi, 1, from_to, 2);
+            }
+            else if (cn == 4) {
+                int from_to[] = { 0,0, 1,1, 2,2, 1,3};
+                src_roi.create(roiSize, type);
+                mixChannels(&image, 1, &src_roi, 1, from_to, 4);
+            }
+            else image.copyTo(src_roi);
+        }
 
         Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
         randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255);
@@ -65,8 +82,23 @@ OCL_TEST_P(FastNlMeansDenoising, Mat)
     {
         generateTestData();
 
-        OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize));
-        OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize));
+        OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
+        OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
+
+        OCL_EXPECT_MATS_NEAR(dst, 1);
+    }
+}
+
+typedef FastNlMeansDenoisingTestBase FastNlMeansDenoising_hsep;
+
+OCL_TEST_P(FastNlMeansDenoising_hsep, Mat)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize, normType));
+        OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize, normType));
 
         OCL_EXPECT_MATS_NEAR(dst, 1);
     }
@@ -80,15 +112,21 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat)
     {
         generateTestData();
 
-        OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h, h, templateWindowSize, searchWindowSize));
-        OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h, h, templateWindowSize, searchWindowSize));
+        OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
+        OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
 
         OCL_EXPECT_MATS_NEAR(dst, 1);
     }
 }
 
-OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool()));
-OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising,
+                            Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
+                                    Bool(), Values(true)));
+OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising_hsep,
+                            Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
+                                    Bool(), Values(true)));
+OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored,
+                            Combine(Values(3, 4), Values((int)NORM_L2), Bool(), Values(false)));
 
 } } // namespace cvtest::ocl
 
diff --git a/modules/python/common.cmake b/modules/python/common.cmake
index b09b72192..57439809b 100644
--- a/modules/python/common.cmake
+++ b/modules/python/common.cmake
@@ -50,7 +50,11 @@ ocv_add_library(${the_module} SHARED ${PYTHON_SOURCE_DIR}/src2/cv2.cpp ${cv2_gen
 if(PYTHON_DEBUG_LIBRARIES AND NOT PYTHON_LIBRARIES MATCHES "optimized.*debug")
   ocv_target_link_libraries(${the_module} debug ${PYTHON_DEBUG_LIBRARIES} optimized ${PYTHON_LIBRARIES})
 else()
-  ocv_target_link_libraries(${the_module} ${PYTHON_LIBRARIES})
+  if(APPLE)
+    set_target_properties(${the_module} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+  else()
+    ocv_target_link_libraries(${the_module} ${PYTHON_LIBRARIES})
+  endif()
 endif()
 ocv_target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS})
 
diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py
index 55a79484c..43adfbfc4 100755
--- a/modules/python/src2/gen2.py
+++ b/modules/python/src2/gen2.py
@@ -255,20 +255,24 @@ class ClassInfo(object):
         self.methods = {}
         self.props = []
         self.consts = {}
+        self.base = None
         customname = False
 
         if decl:
-            self.bases = decl[1].split()[1:]
-            if len(self.bases) > 1:
+            bases = decl[1].split()[1:]
+            if len(bases) > 1:
                 print("Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.name,))
-                print("      Bases: ", " ".join(self.bases))
+                print("      Bases: ", " ".join(bases))
                 print("      Only the first base class will be used")
-                self.bases = [self.bases[0].strip(",")]
                 #return sys.exit(-1)
-            if self.bases and self.bases[0].startswith("cv::"):
-                self.bases[0] = self.bases[0][4:]
-            if self.bases and self.bases[0] == "Algorithm":
-                self.isalgorithm = True
+            elif len(bases) == 1:
+                self.base = bases[0].strip(",")
+                if self.base.startswith("cv::"):
+                    self.base = self.base[4:]
+                if self.base == "Algorithm":
+                    self.isalgorithm = True
+                self.base = self.base.replace("::", "_")
+
             for m in decl[2]:
                 if m.startswith("="):
                     self.wname = m[1:]
@@ -285,8 +289,8 @@ class ClassInfo(object):
     def gen_map_code(self, all_classes):
         code = "static bool pyopencv_to(PyObject* src, %s& dst, const char* name)\n{\n    PyObject* tmp;\n    bool ok;\n" % (self.cname)
         code += "".join([gen_template_set_prop_from_map.substitute(propname=p.name,proptype=p.tp) for p in self.props])
-        if self.bases:
-            code += "\n    return pyopencv_to(src, (%s&)dst, name);\n}\n" % all_classes[self.bases[0].replace("::", "_")].cname
+        if self.base:
+            code += "\n    return pyopencv_to(src, (%s&)dst, name);\n}\n" % all_classes[self.base].cname
         else:
             code += "\n    return true;\n}\n"
         return code
@@ -330,8 +334,8 @@ class ClassInfo(object):
             methods_inits.write(m.get_tab_entry())
 
         baseptr = "NULL"
-        if self.bases and self.bases[0] in all_classes:
-            baseptr = "&pyopencv_" + all_classes[self.bases[0]].name + "_Type"
+        if self.base and self.base in all_classes:
+            baseptr = "&pyopencv_" + all_classes[self.base].name + "_Type"
 
         code = gen_template_type_impl.substitute(name=self.name, wname=self.wname, cname=self.cname,
             getset_code=getset_code.getvalue(), getset_inits=getset_inits.getvalue(),
@@ -753,17 +757,17 @@ class PythonWrapperGenerator(object):
             sys.exit(-1)
         self.classes[classinfo.name] = classinfo
 
-        if classinfo.bases:
-            chunks = classinfo.bases[0].split('::')
+        if classinfo.base:
+            chunks = classinfo.base.split('_')
             base = '_'.join(chunks)
             while base not in self.classes and len(chunks)>1:
                 del chunks[-2]
                 base = '_'.join(chunks)
             if base not in self.classes:
                 print("Generator error: unable to resolve base %s for %s"
-                    % (classinfo.bases[0], classinfo.name))
+                    % (classinfo.base, classinfo.name))
                 sys.exit(-1)
-            classinfo.bases[0] = "::".join(chunks)
+            classinfo.base = base
             classinfo.isalgorithm |= self.classes[base].isalgorithm
 
     def split_decl_name(self, name):
diff --git a/modules/python/test/test.py b/modules/python/test/test.py
index a0f0daa56..093979aba 100644
--- a/modules/python/test/test.py
+++ b/modules/python/test/test.py
@@ -145,6 +145,16 @@ class Hackathon244Tests(NewOpenCVTests):
         self.check_close_pairs(mc, mc0, 5)
         self.assertLessEqual(abs(mr - mr0), 5)
 
+    def test_inheritance(self):
+        bm = cv2.StereoBM_create()
+        bm.getPreFilterCap() # from StereoBM
+        bm.getBlockSize() # from SteroMatcher
+
+        boost = cv2.ml.Boost_create()
+        boost.getBoostType() # from ml::Boost
+        boost.getMaxDepth() # from ml::DTrees
+        boost.isClassifier() # from ml::StatModel
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='run OpenCV python tests')
     parser.add_argument('--repo', help='use sample image files from local git repository (path to folder), '
diff --git a/modules/superres/include/opencv2/superres.hpp b/modules/superres/include/opencv2/superres.hpp
index acc067302..dec8e4eda 100644
--- a/modules/superres/include/opencv2/superres.hpp
+++ b/modules/superres/include/opencv2/superres.hpp
@@ -105,34 +105,64 @@ namespace cv
             virtual void collectGarbage();
 
             //! @brief Scale factor
-            CV_PURE_PROPERTY(int, Scale)
+            /** @see setScale */
+            virtual int getScale() const = 0;
+            /** @copybrief getScale @see getScale */
+            virtual void setScale(int val) = 0;
 
             //! @brief Iterations count
-            CV_PURE_PROPERTY(int, Iterations)
+            /** @see setIterations */
+            virtual int getIterations() const = 0;
+            /** @copybrief getIterations @see getIterations */
+            virtual void setIterations(int val) = 0;
 
             //! @brief Asymptotic value of steepest descent method
-            CV_PURE_PROPERTY(double, Tau)
+            /** @see setTau */
+            virtual double getTau() const = 0;
+            /** @copybrief getTau @see getTau */
+            virtual void setTau(double val) = 0;
 
             //! @brief Weight parameter to balance data term and smoothness term
-            CV_PURE_PROPERTY(double, Labmda)
+            /** @see setLabmda */
+            virtual double getLabmda() const = 0;
+            /** @copybrief getLabmda @see getLabmda */
+            virtual void setLabmda(double val) = 0;
 
             //! @brief Parameter of spacial distribution in Bilateral-TV
-            CV_PURE_PROPERTY(double, Alpha)
+            /** @see setAlpha */
+            virtual double getAlpha() const = 0;
+            /** @copybrief getAlpha @see getAlpha */
+            virtual void setAlpha(double val) = 0;
 
             //! @brief Kernel size of Bilateral-TV filter
-            CV_PURE_PROPERTY(int, KernelSize)
+            /** @see setKernelSize */
+            virtual int getKernelSize() const = 0;
+            /** @copybrief getKernelSize @see getKernelSize */
+            virtual void setKernelSize(int val) = 0;
 
             //! @brief Gaussian blur kernel size
-            CV_PURE_PROPERTY(int, BlurKernelSize)
+            /** @see setBlurKernelSize */
+            virtual int getBlurKernelSize() const = 0;
+            /** @copybrief getBlurKernelSize @see getBlurKernelSize */
+            virtual void setBlurKernelSize(int val) = 0;
 
             //! @brief Gaussian blur sigma
-            CV_PURE_PROPERTY(double, BlurSigma)
+            /** @see setBlurSigma */
+            virtual double getBlurSigma() const = 0;
+            /** @copybrief getBlurSigma @see getBlurSigma */
+            virtual void setBlurSigma(double val) = 0;
 
             //! @brief Radius of the temporal search area
-            CV_PURE_PROPERTY(int, TemporalAreaRadius)
+            /** @see setTemporalAreaRadius */
+            virtual int getTemporalAreaRadius() const = 0;
+            /** @copybrief getTemporalAreaRadius @see getTemporalAreaRadius */
+            virtual void setTemporalAreaRadius(int val) = 0;
 
             //! @brief Dense optical flow algorithm
-            CV_PURE_PROPERTY_S(Ptr<cv::superres::DenseOpticalFlowExt>, OpticalFlow)
+            /** @see setOpticalFlow */
+            virtual Ptr<cv::superres::DenseOpticalFlowExt> getOpticalFlow() const = 0;
+            /** @copybrief getOpticalFlow @see getOpticalFlow */
+            virtual void setOpticalFlow(const Ptr<cv::superres::DenseOpticalFlowExt> &val) = 0;
 
         protected:
             SuperResolution();
diff --git a/modules/superres/include/opencv2/superres/optical_flow.hpp b/modules/superres/include/opencv2/superres/optical_flow.hpp
index add606c02..d2f29a39b 100644
--- a/modules/superres/include/opencv2/superres/optical_flow.hpp
+++ b/modules/superres/include/opencv2/superres/optical_flow.hpp
@@ -64,13 +64,34 @@ namespace cv
         class CV_EXPORTS FarnebackOpticalFlow : public virtual DenseOpticalFlowExt
         {
         public:
-            CV_PURE_PROPERTY(double, PyrScale)
-            CV_PURE_PROPERTY(int, LevelsNumber)
-            CV_PURE_PROPERTY(int, WindowSize)
-            CV_PURE_PROPERTY(int, Iterations)
-            CV_PURE_PROPERTY(int, PolyN)
-            CV_PURE_PROPERTY(double, PolySigma)
-            CV_PURE_PROPERTY(int, Flags)
+            /** @see setPyrScale */
+            virtual double getPyrScale() const = 0;
+            /** @copybrief getPyrScale @see getPyrScale */
+            virtual void setPyrScale(double val) = 0;
+            /** @see setLevelsNumber */
+            virtual int getLevelsNumber() const = 0;
+            /** @copybrief getLevelsNumber @see getLevelsNumber */
+            virtual void setLevelsNumber(int val) = 0;
+            /** @see setWindowSize */
+            virtual int getWindowSize() const = 0;
+            /** @copybrief getWindowSize @see getWindowSize */
+            virtual void setWindowSize(int val) = 0;
+            /** @see setIterations */
+            virtual int getIterations() const = 0;
+            /** @copybrief getIterations @see getIterations */
+            virtual void setIterations(int val) = 0;
+            /** @see setPolyN */
+            virtual int getPolyN() const = 0;
+            /** @copybrief getPolyN @see getPolyN */
+            virtual void setPolyN(int val) = 0;
+            /** @see setPolySigma */
+            virtual double getPolySigma() const = 0;
+            /** @copybrief getPolySigma @see getPolySigma */
+            virtual void setPolySigma(double val) = 0;
+            /** @see setFlags */
+            virtual int getFlags() const = 0;
+            /** @copybrief getFlags @see getFlags */
+            virtual void setFlags(int val) = 0;
         };
         CV_EXPORTS Ptr<FarnebackOpticalFlow> createOptFlow_Farneback();
         CV_EXPORTS Ptr<FarnebackOpticalFlow> createOptFlow_Farneback_CUDA();
@@ -82,14 +103,38 @@ namespace cv
         class CV_EXPORTS DualTVL1OpticalFlow : public virtual DenseOpticalFlowExt
         {
         public:
-            CV_PURE_PROPERTY(double, Tau)
-            CV_PURE_PROPERTY(double, Lambda)
-            CV_PURE_PROPERTY(double, Theta)
-            CV_PURE_PROPERTY(int, ScalesNumber)
-            CV_PURE_PROPERTY(int, WarpingsNumber)
-            CV_PURE_PROPERTY(double, Epsilon)
-            CV_PURE_PROPERTY(int, Iterations)
-            CV_PURE_PROPERTY(bool, UseInitialFlow)
+            /** @see setTau */
+            virtual double getTau() const = 0;
+            /** @copybrief getTau @see getTau */
+            virtual void setTau(double val) = 0;
+            /** @see setLambda */
+            virtual double getLambda() const = 0;
+            /** @copybrief getLambda @see getLambda */
+            virtual void setLambda(double val) = 0;
+            /** @see setTheta */
+            virtual double getTheta() const = 0;
+            /** @copybrief getTheta @see getTheta */
+            virtual void setTheta(double val) = 0;
+            /** @see setScalesNumber */
+            virtual int getScalesNumber() const = 0;
+            /** @copybrief getScalesNumber @see getScalesNumber */
+            virtual void setScalesNumber(int val) = 0;
+            /** @see setWarpingsNumber */
+            virtual int getWarpingsNumber() const = 0;
+            /** @copybrief getWarpingsNumber @see getWarpingsNumber */
+            virtual void setWarpingsNumber(int val) = 0;
+            /** @see setEpsilon */
+            virtual double getEpsilon() const = 0;
+            /** @copybrief getEpsilon @see getEpsilon */
+            virtual void setEpsilon(double val) = 0;
+            /** @see setIterations */
+            virtual int getIterations() const = 0;
+            /** @copybrief getIterations @see getIterations */
+            virtual void setIterations(int val) = 0;
+            /** @see setUseInitialFlow */
+            virtual bool getUseInitialFlow() const = 0;
+            /** @copybrief getUseInitialFlow @see getUseInitialFlow */
+            virtual void setUseInitialFlow(bool val) = 0;
         };
         CV_EXPORTS Ptr<DualTVL1OpticalFlow> createOptFlow_DualTVL1();
         CV_EXPORTS Ptr<DualTVL1OpticalFlow> createOptFlow_DualTVL1_CUDA();
@@ -99,17 +144,35 @@ namespace cv
         {
         public:
             //! @brief Flow smoothness
-            CV_PURE_PROPERTY(double, Alpha)
+            /** @see setAlpha */
+            virtual double getAlpha() const = 0;
+            /** @copybrief getAlpha @see getAlpha */
+            virtual void setAlpha(double val) = 0;
             //! @brief Gradient constancy importance
-            CV_PURE_PROPERTY(double, Gamma)
+            /** @see setGamma */
+            virtual double getGamma() const = 0;
+            /** @copybrief getGamma @see getGamma */
+            virtual void setGamma(double val) = 0;
             //! @brief Pyramid scale factor
-            CV_PURE_PROPERTY(double, ScaleFactor)
+            /** @see setScaleFactor */
+            virtual double getScaleFactor() const = 0;
+            /** @copybrief getScaleFactor @see getScaleFactor */
+            virtual void setScaleFactor(double val) = 0;
             //! @brief Number of lagged non-linearity iterations (inner loop)
-            CV_PURE_PROPERTY(int, InnerIterations)
+            /** @see setInnerIterations */
+            virtual int getInnerIterations() const = 0;
+            /** @copybrief getInnerIterations @see getInnerIterations */
+            virtual void setInnerIterations(int val) = 0;
             //! @brief Number of warping iterations (number of pyramid levels)
-            CV_PURE_PROPERTY(int, OuterIterations)
+            /** @see setOuterIterations */
+            virtual int getOuterIterations() const = 0;
+            /** @copybrief getOuterIterations @see getOuterIterations */
+            virtual void setOuterIterations(int val) = 0;
             //! @brief Number of linear system solver iterations
-            CV_PURE_PROPERTY(int, SolverIterations)
+            /** @see setSolverIterations */
+            virtual int getSolverIterations() const = 0;
+            /** @copybrief getSolverIterations @see getSolverIterations */
+            virtual void setSolverIterations(int val) = 0;
         };
         CV_EXPORTS Ptr<BroxOpticalFlow> createOptFlow_Brox_CUDA();
 
@@ -117,9 +180,18 @@ namespace cv
         class PyrLKOpticalFlow : public virtual DenseOpticalFlowExt
         {
         public:
-            CV_PURE_PROPERTY(int, WindowSize)
-            CV_PURE_PROPERTY(int, MaxLevel)
-            CV_PURE_PROPERTY(int, Iterations)
+            /** @see setWindowSize */
+            virtual int getWindowSize() const = 0;
+            /** @copybrief getWindowSize @see getWindowSize */
+            virtual void setWindowSize(int val) = 0;
+            /** @see setMaxLevel */
+            virtual int getMaxLevel() const = 0;
+            /** @copybrief getMaxLevel @see getMaxLevel */
+            virtual void setMaxLevel(int val) = 0;
+            /** @see setIterations */
+            virtual int getIterations() const = 0;
+            /** @copybrief getIterations @see getIterations */
+            virtual void setIterations(int val) = 0;
         };
         CV_EXPORTS Ptr<PyrLKOpticalFlow> createOptFlow_PyrLK_CUDA();
 
diff --git a/modules/ts/misc/run.py b/modules/ts/misc/run.py
index 19ab2ab7f..f25922d94 100755
--- a/modules/ts/misc/run.py
+++ b/modules/ts/misc/run.py
@@ -850,12 +850,51 @@ def getRunArgs(args):
             path = npath
     return run_args
 
+if hostos == "nt":
+    def moveTests(instance, destination):
+        src = os.path.dirname(instance.tests_dir)
+        # new binaries path
+        newBinPath = os.path.join(destination, "bin")
+
+        try:
+            # copy binaries and CMakeCache.txt to the specified destination
+            shutil.copytree(src, newBinPath)
+            shutil.copy(os.path.join(instance.path, "CMakeCache.txt"), os.path.join(destination, "CMakeCache.txt"))
+        except Exception, e:
+            print "Copying error occurred:", str(e)
+            exit(e.errno)
+
+        # pattern of CMakeCache.txt string to be replaced
+        replacePattern = re.compile("EXECUTABLE_OUTPUT_PATH:PATH=(.+)")
+
+        with open(os.path.join(destination, "CMakeCache.txt"), "r") as cachefile:
+            try:
+                cachedata = cachefile.read()
+                if hostos == 'nt':
+                    # fix path slashes on nt systems
+                    newBinPath = re.sub(r"\\", r"/", newBinPath)
+                # replace old binaries path in CMakeCache.txt
+                cachedata = re.sub(re.search(replacePattern, cachedata).group(1), newBinPath, cachedata)
+            except Exception, e:
+                print "Reading error occurred:", str(e)
+                exit(e.errno)
+
+        with open(os.path.join(destination, "CMakeCache.txt"), "w") as cachefile:
+            try:
+                cachefile.write(cachedata)
+            except Exception, e:
+                print "Writing error occurred:", str(e)
+                exit(e.errno)
+        exit()
+
 if __name__ == "__main__":
     test_args = [a for a in sys.argv if a.startswith("--perf_") or a.startswith("--gtest_")]
     argv =      [a for a in sys.argv if not(a.startswith("--perf_") or a.startswith("--gtest_"))]
 
-    parser = OptionParser()
+    parser = OptionParser(usage="run.py [options] [build_path]", description="Note: build_path is required if running not from CMake build directory")
     parser.add_option("-t", "--tests", dest="tests", help="comma-separated list of modules to test", metavar="SUITS", default="")
+    if hostos == "nt":
+        parser.add_option("-m", "--move_tests", dest="move", help="location to move current tests build", metavar="PATH", default="")
     parser.add_option("-w", "--cwd", dest="cwd", help="working directory for tests", metavar="PATH", default=".")
     parser.add_option("-a", "--accuracy", dest="accuracy", help="look for accuracy tests instead of performance tests", action="store_true", default=False)
     parser.add_option("-l", "--longname", dest="useLongNames", action="store_true", help="generate log files with long names", default=False)
@@ -880,6 +919,7 @@ if __name__ == "__main__":
 
     if len(run_args) == 0:
         print >> sys.stderr, "Usage:", os.path.basename(sys.argv[0]), "[options] [build_path]"
+        print >> sys.stderr, "Please specify build_path or run script from CMake build directory"
         exit(1)
 
     options.android_env = {}
@@ -906,6 +946,10 @@ if __name__ == "__main__":
     test_list = []
     for path in run_args:
         suite = TestSuite(options, path)
+
+        if hostos == "nt":
+            if(options.move):
+                moveTests(suite, options.move)
         #print vars(suite),"\n"
         if options.list:
             test_list.extend(suite.tests)
diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp
index 90be72ea2..718d76523 100644
--- a/modules/video/include/opencv2/video/tracking.hpp
+++ b/modules/video/include/opencv2/video/tracking.hpp
@@ -441,29 +441,65 @@ class CV_EXPORTS_W DualTVL1OpticalFlow : public DenseOpticalFlow
 {
 public:
     //! @brief Time step of the numerical scheme
-    CV_PURE_PROPERTY(double, Tau)
+    /** @see setTau */
+    virtual double getTau() const = 0;
+    /** @copybrief getTau @see getTau */
+    virtual void setTau(double val) = 0;
     //! @brief Weight parameter for the data term, attachment parameter
-    CV_PURE_PROPERTY(double, Lambda)
+    /** @see setLambda */
+    virtual double getLambda() const = 0;
+    /** @copybrief getLambda @see getLambda */
+    virtual void setLambda(double val) = 0;
     //! @brief Weight parameter for (u - v)^2, tightness parameter
-    CV_PURE_PROPERTY(double, Theta)
+    /** @see setTheta */
+    virtual double getTheta() const = 0;
+    /** @copybrief getTheta @see getTheta */
+    virtual void setTheta(double val) = 0;
     //! @brief coefficient for additional illumination variation term
-    CV_PURE_PROPERTY(double, Gamma)
+    /** @see setGamma */
+    virtual double getGamma() const = 0;
+    /** @copybrief getGamma @see getGamma */
+    virtual void setGamma(double val) = 0;
     //! @brief Number of scales used to create the pyramid of images
-    CV_PURE_PROPERTY(int, ScalesNumber)
+    /** @see setScalesNumber */
+    virtual int getScalesNumber() const = 0;
+    /** @copybrief getScalesNumber @see getScalesNumber */
+    virtual void setScalesNumber(int val) = 0;
     //! @brief Number of warpings per scale
-    CV_PURE_PROPERTY(int, WarpingsNumber)
+    /** @see setWarpingsNumber */
+    virtual int getWarpingsNumber() const = 0;
+    /** @copybrief getWarpingsNumber @see getWarpingsNumber */
+    virtual void setWarpingsNumber(int val) = 0;
     //! @brief Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time
-    CV_PURE_PROPERTY(double, Epsilon)
+    /** @see setEpsilon */
+    virtual double getEpsilon() const = 0;
+    /** @copybrief getEpsilon @see getEpsilon */
+    virtual void setEpsilon(double val) = 0;
     //! @brief Inner iterations (between outlier filtering) used in the numerical scheme
-    CV_PURE_PROPERTY(int, InnerIterations)
+    /** @see setInnerIterations */
+    virtual int getInnerIterations() const = 0;
+    /** @copybrief getInnerIterations @see getInnerIterations */
+    virtual void setInnerIterations(int val) = 0;
     //! @brief Outer iterations (number of inner loops) used in the numerical scheme
-    CV_PURE_PROPERTY(int, OuterIterations)
+    /** @see setOuterIterations */
+    virtual int getOuterIterations() const = 0;
+    /** @copybrief getOuterIterations @see getOuterIterations */
+    virtual void setOuterIterations(int val) = 0;
     //! @brief Use initial flow
-    CV_PURE_PROPERTY(bool, UseInitialFlow)
+    /** @see setUseInitialFlow */
+    virtual bool getUseInitialFlow() const = 0;
+    /** @copybrief getUseInitialFlow @see getUseInitialFlow */
+    virtual void setUseInitialFlow(bool val) = 0;
     //! @brief Step between scales (<1)
-    CV_PURE_PROPERTY(double, ScaleStep)
+    /** @see setScaleStep */
+    virtual double getScaleStep() const = 0;
+    /** @copybrief getScaleStep @see getScaleStep */
+    virtual void setScaleStep(double val) = 0;
     //! @brief Median filter kernel size (1 = no filter) (3 or 5)
-    CV_PURE_PROPERTY(int, MedianFiltering)
+    /** @see setMedianFiltering */
+    virtual int getMedianFiltering() const = 0;
+    /** @copybrief getMedianFiltering @see getMedianFiltering */
+    virtual void setMedianFiltering(int val) = 0;
 };
 
 /** @brief Creates instance of cv::DenseOpticalFlow
diff --git a/modules/video/perf/perf_ecc.cpp b/modules/video/perf/perf_ecc.cpp
index 72410cf57..c706caa07 100644
--- a/modules/video/perf/perf_ecc.cpp
+++ b/modules/video/perf/perf_ecc.cpp
@@ -67,5 +67,5 @@ PERF_TEST_P(TransformationType, findTransformECC, /*testing::ValuesIn(MotionType
         findTransformECC(templateImage, img, warpMat, transform_type,
             TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 5, -1));
     }
-    SANITY_CHECK(warpMat, 1e-3);
+    SANITY_CHECK(warpMat, 3e-3);
 }
diff --git a/modules/video/src/ecc.cpp b/modules/video/src/ecc.cpp
index 8c5fbee79..6e4d6e9f0 100644
--- a/modules/video/src/ecc.cpp
+++ b/modules/video/src/ecc.cpp
@@ -465,6 +465,7 @@ double cv::findTransformECC(InputArray templateImage,
         meanStdDev(templateFloat, tmpMean, tmpStd, imageMask);
 
         subtract(imageWarped,   imgMean, imageWarped, imageMask);//zero-mean input
+        templateZM = Mat::zeros(templateZM.rows, templateZM.cols, templateZM.type());
         subtract(templateFloat, tmpMean, templateZM,  imageMask);//zero-mean template
 
         const double tmpNorm = std::sqrt(countNonZero(imageMask)*(tmpStd.val[0])*(tmpStd.val[0]));
diff --git a/modules/videoio/CMakeLists.txt b/modules/videoio/CMakeLists.txt
index 0b4f391c5..9332f2860 100644
--- a/modules/videoio/CMakeLists.txt
+++ b/modules/videoio/CMakeLists.txt
@@ -27,6 +27,8 @@ set(videoio_hdrs
 set(videoio_srcs
     ${CMAKE_CURRENT_LIST_DIR}/src/cap.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/cap_images.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/src/cap_mjpeg_encoder.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/src/cap_mjpeg_decoder.cpp
     )
 
 file(GLOB videoio_ext_hdrs
diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp
index 01fc41048..ae958bcee 100644
--- a/modules/videoio/include/opencv2/videoio.hpp
+++ b/modules/videoio/include/opencv2/videoio.hpp
@@ -110,8 +110,7 @@ enum { CAP_PROP_POS_MSEC       =0,
        CAP_PROP_CONVERT_RGB   =16,
        CAP_PROP_WHITE_BALANCE_BLUE_U =17,
        CAP_PROP_RECTIFICATION =18,
-       CAP_PROP_MONOCROME     =19,
-       CAP_PROP_MONOCHROME    =CAP_PROP_MONOCROME,
+       CAP_PROP_MONOCHROME    =19,
        CAP_PROP_SHARPNESS     =20,
        CAP_PROP_AUTO_EXPOSURE =21, // DC1394: exposure control done by camera, user can adjust refernce level using this feature
        CAP_PROP_GAMMA         =22,
@@ -217,7 +216,8 @@ enum { CAP_PROP_PVAPI_MULTICASTIP           = 300, // ip for anable multicast ma
        CAP_PROP_PVAPI_DECIMATIONHORIZONTAL  = 302, // Horizontal sub-sampling of the image
        CAP_PROP_PVAPI_DECIMATIONVERTICAL    = 303, // Vertical sub-sampling of the image
        CAP_PROP_PVAPI_BINNINGX              = 304, // Horizontal binning factor
-       CAP_PROP_PVAPI_BINNINGY              = 305  // Vertical binning factor
+       CAP_PROP_PVAPI_BINNINGY              = 305, // Vertical binning factor
+       CAP_PROP_PVAPI_PIXELFORMAT           = 306  // Pixel format
      };
 
 // PVAPI: FrameStartTriggerMode
@@ -235,6 +235,17 @@ enum { CAP_PVAPI_DECIMATION_OFF       = 1,    // Off
        CAP_PVAPI_DECIMATION_2OUTOF16  = 8     // 2 out of 16 decimation
      };
 
+// PVAPI: PixelFormat
+enum { CAP_PVAPI_PIXELFORMAT_MONO8    = 1,    // Mono8
+       CAP_PVAPI_PIXELFORMAT_MONO16   = 2,    // Mono16
+       CAP_PVAPI_PIXELFORMAT_BAYER8   = 3,    // Bayer8
+       CAP_PVAPI_PIXELFORMAT_BAYER16  = 4,    // Bayer16
+       CAP_PVAPI_PIXELFORMAT_RGB24    = 5,    // Rgb24
+       CAP_PVAPI_PIXELFORMAT_BGR24    = 6,    // Bgr24
+       CAP_PVAPI_PIXELFORMAT_RGBA32   = 7,    // Rgba32
+       CAP_PVAPI_PIXELFORMAT_BGRA32   = 8,    // Bgra32
+     };
+
 // Properties of cameras available through XIMEA SDK interface
 enum { CAP_PROP_XI_DOWNSAMPLING  = 400, // Change image resolution by binning or skipping.
        CAP_PROP_XI_DATA_FORMAT   = 401, // Output data format.
@@ -365,6 +376,9 @@ enum { CAP_INTELPERC_DEPTH_MAP              = 0, // Each pixel is a 16-bit integ
        CAP_INTELPERC_IMAGE                  = 3
      };
 
+enum { VIDEOWRITER_PROP_QUALITY = 1,    // Quality (0..100%) of the videostream encoded
+       VIDEOWRITER_PROP_FRAMEBYTES = 2, // (Read-only): Size of just encoded video frame
+     };
 
 class IVideoCapture;
 
@@ -575,10 +589,10 @@ public:
 protected:
     Ptr<CvCapture> cap;
     Ptr<IVideoCapture> icap;
-private:
-    static Ptr<IVideoCapture> createCameraCapture(int index);
 };
 
+class IVideoWriter;
+
 /** @brief Video writer class.
  */
 class CV_EXPORTS_W VideoWriter
@@ -631,6 +645,25 @@ public:
      */
     CV_WRAP virtual void write(const Mat& image);
 
+    /** @brief Sets a property in the VideoWriter.
+
+     @param propId Property identifier. It can be one of the following:
+     -   **VIDEOWRITER_PROP_QUALITY** Quality (0..100%) of the videostream encoded. Can be adjusted dynamically in some codecs.
+     @param value Value of the property.
+     */
+    CV_WRAP virtual bool set(int propId, double value);
+
+    /** @brief Returns the specified VideoWriter property
+
+     @param propId Property identifier. It can be one of the following:
+     -   **VIDEOWRITER_PROP_QUALITY** Current quality of the encoded videostream.
+     -   **VIDEOWRITER_PROP_FRAMEBYTES** (Read-only) Size of just encoded video frame; note that the encoding order may be different from representation order.
+
+     **Note**: When querying a property that is not supported by the backend used by the VideoWriter
+     class, value 0 is returned.
+     */
+    CV_WRAP virtual double get(int propId) const;
+
     /** @brief Concatenates 4 chars to a fourcc code
 
     This static method constructs the fourcc code of the codec to be used in the constructor
@@ -640,6 +673,10 @@ public:
 
 protected:
     Ptr<CvVideoWriter> writer;
+    Ptr<IVideoWriter> iwriter;
+
+    static Ptr<IVideoWriter> create(const String& filename, int fourcc, double fps,
+                                    Size frameSize, bool isColor = true);
 };
 
 template<> CV_EXPORTS void DefaultDeleter<CvCapture>::operator ()(CvCapture* obj) const;
diff --git a/modules/videoio/include/opencv2/videoio/videoio_c.h b/modules/videoio/include/opencv2/videoio/videoio_c.h
index 5ab734531..767cef916 100644
--- a/modules/videoio/include/opencv2/videoio/videoio_c.h
+++ b/modules/videoio/include/opencv2/videoio/videoio_c.h
@@ -160,7 +160,6 @@ enum
     CV_CAP_PROP_CONVERT_RGB   =16,
     CV_CAP_PROP_WHITE_BALANCE_BLUE_U =17,
     CV_CAP_PROP_RECTIFICATION =18,
-    CV_CAP_PROP_MONOCROME     =19,
     CV_CAP_PROP_MONOCHROME    =19,
     CV_CAP_PROP_SHARPNESS     =20,
     CV_CAP_PROP_AUTO_EXPOSURE =21, // exposure control done by camera,
@@ -227,6 +226,7 @@ enum
     CV_CAP_PROP_PVAPI_DECIMATIONVERTICAL    = 303, // Vertical sub-sampling of the image
     CV_CAP_PROP_PVAPI_BINNINGX              = 304, // Horizontal binning factor
     CV_CAP_PROP_PVAPI_BINNINGY              = 305, // Vertical binning factor
+    CV_CAP_PROP_PVAPI_PIXELFORMAT           = 306, // Pixel format
 
     // Properties of cameras available through XIMEA SDK interface
     CV_CAP_PROP_XI_DOWNSAMPLING  = 400,      // Change image resolution by binning or skipping.
diff --git a/modules/videoio/src/cap.cpp b/modules/videoio/src/cap.cpp
index 09fa1c081..b5a44da3f 100644
--- a/modules/videoio/src/cap.cpp
+++ b/modules/videoio/src/cap.cpp
@@ -499,6 +499,67 @@ CV_IMPL void cvReleaseVideoWriter( CvVideoWriter** pwriter )
 namespace cv
 {
 
+static Ptr<IVideoCapture> IVideoCapture_create(int index)
+{
+    int  domains[] =
+    {
+#ifdef HAVE_DSHOW
+        CV_CAP_DSHOW,
+#endif
+#ifdef HAVE_INTELPERC
+        CV_CAP_INTELPERC,
+#endif
+        -1, -1
+    };
+
+    // interpret preferred interface (0 = autodetect)
+    int pref = (index / 100) * 100;
+    if (pref)
+    {
+        domains[0]=pref;
+        index %= 100;
+        domains[1]=-1;
+    }
+
+    // try every possibly installed camera API
+    for (int i = 0; domains[i] >= 0; i++)
+    {
+#if defined(HAVE_DSHOW)        || \
+    defined(HAVE_INTELPERC)    || \
+    (0)
+        Ptr<IVideoCapture> capture;
+
+        switch (domains[i])
+        {
+#ifdef HAVE_DSHOW
+            case CV_CAP_DSHOW:
+                capture = makePtr<VideoCapture_DShow>(index);
+                break; // CV_CAP_DSHOW
+#endif
+#ifdef HAVE_INTELPERC
+            case CV_CAP_INTELPERC:
+                capture = makePtr<VideoCapture_IntelPerC>();
+                break; // CV_CAP_INTEL_PERC
+#endif
+        }
+        if (capture && capture->isOpened())
+            return capture;
+#endif
+    }
+
+    // failed open a camera
+    return Ptr<IVideoCapture>();
+}
+
+
+static Ptr<IVideoWriter> IVideoWriter_create(const String& filename, int _fourcc, double fps, Size frameSize, bool isColor)
+{
+    Ptr<IVideoWriter> iwriter;
+    if( _fourcc == CV_FOURCC('M', 'J', 'P', 'G') )
+        iwriter = createMotionJpegWriter(filename, fps, frameSize, isColor);
+    return iwriter;
+}
+
 VideoCapture::VideoCapture()
 {}
 
@@ -528,7 +589,7 @@ bool VideoCapture::open(const String& filename)
 bool VideoCapture::open(int device)
 {
     if (isOpened()) release();
-    icap = createCameraCapture(device);
+    icap = IVideoCapture_create(device);
     if (!icap.empty())
         return true;
     cap.reset(cvCreateCameraCapture(device));
@@ -609,59 +670,6 @@ double VideoCapture::get(int propId) const
     return icvGetCaptureProperty(cap, propId);
 }
 
-Ptr<IVideoCapture> VideoCapture::createCameraCapture(int index)
-{
-    int  domains[] =
-    {
-#ifdef HAVE_DSHOW
-        CV_CAP_DSHOW,
-#endif
-#ifdef HAVE_INTELPERC
-        CV_CAP_INTELPERC,
-#endif
-        -1, -1
-    };
-
-    // interpret preferred interface (0 = autodetect)
-    int pref = (index / 100) * 100;
-    if (pref)
-    {
-        domains[0]=pref;
-        index %= 100;
-        domains[1]=-1;
-    }
-
-    // try every possibly installed camera API
-    for (int i = 0; domains[i] >= 0; i++)
-    {
-#if defined(HAVE_DSHOW)        || \
-    defined(HAVE_INTELPERC)    || \
-    (0)
-        Ptr<IVideoCapture> capture;
-
-        switch (domains[i])
-        {
-#ifdef HAVE_DSHOW
-        case CV_CAP_DSHOW:
-            capture = makePtr<VideoCapture_DShow>(index);
-            if (capture && capture.dynamicCast<VideoCapture_DShow>()->isOpened())
-                return capture;
-            break; // CV_CAP_DSHOW
-#endif
-#ifdef HAVE_INTELPERC
-        case CV_CAP_INTELPERC:
-            capture = makePtr<VideoCapture_IntelPerC>();
-            if (capture && capture.dynamicCast<VideoCapture_IntelPerC>()->isOpened())
-                return capture;
-            break; // CV_CAP_INTEL_PERC
-#endif
-        }
-#endif
-    }
-
-    // failed open a camera
-    return Ptr<IVideoCapture>();
-}
 
 VideoWriter::VideoWriter()
 {}
@@ -673,6 +681,7 @@ VideoWriter::VideoWriter(const String& filename, int _fourcc, double fps, Size f
 
 void VideoWriter::release()
 {
+    iwriter.release();
     writer.release();
 }
 
@@ -683,19 +692,43 @@ VideoWriter::~VideoWriter()
 
 bool VideoWriter::open(const String& filename, int _fourcc, double fps, Size frameSize, bool isColor)
 {
+    if (isOpened()) release();
+    iwriter = IVideoWriter_create(filename, _fourcc, fps, frameSize, isColor);
+    if (!iwriter.empty())
+        return true;
     writer.reset(cvCreateVideoWriter(filename.c_str(), _fourcc, fps, frameSize, isColor));
     return isOpened();
 }
 
 bool VideoWriter::isOpened() const
 {
-    return !writer.empty();
+    return !iwriter.empty() || !writer.empty();
+}
+
+
+bool VideoWriter::set(int propId, double value)
+{
+    if (!iwriter.empty())
+        return iwriter->setProperty(propId, value);
+    return false;
+}
+
+double VideoWriter::get(int propId) const
+{
+    if (!iwriter.empty())
+        return iwriter->getProperty(propId);
+    return 0.;
 }
 
 void VideoWriter::write(const Mat& image)
 {
-    IplImage _img = image;
-    cvWriteFrame(writer, &_img);
+    if( iwriter )
+        iwriter->write(image);
+    else
+    {
+        IplImage _img = image;
+        cvWriteFrame(writer, &_img);
+    }
 }
 
 VideoWriter& VideoWriter::operator << (const Mat& image)
diff --git a/modules/videoio/src/cap_dshow.cpp b/modules/videoio/src/cap_dshow.cpp
index 013d08e54..c8c63fcb7 100644
--- a/modules/videoio/src/cap_dshow.cpp
+++ b/modules/videoio/src/cap_dshow.cpp
@@ -2257,7 +2257,7 @@ int videoInput::getVideoPropertyFromCV(int cv_property){
         case CV_CAP_PROP_GAMMA:
             return VideoProcAmp_Gamma;
 
-        case CV_CAP_PROP_MONOCROME:
+        case CV_CAP_PROP_MONOCHROME:
             return VideoProcAmp_ColorEnable;
 
         case CV_CAP_PROP_WHITE_BALANCE_BLUE_U:
@@ -3170,7 +3170,7 @@ double VideoCapture_DShow::getProperty(int propIdx) const
     case CV_CAP_PROP_SATURATION:
     case CV_CAP_PROP_SHARPNESS:
     case CV_CAP_PROP_GAMMA:
-    case CV_CAP_PROP_MONOCROME:
+    case CV_CAP_PROP_MONOCHROME:
     case CV_CAP_PROP_WHITE_BALANCE_BLUE_U:
     case CV_CAP_PROP_BACKLIGHT:
     case CV_CAP_PROP_GAIN:
@@ -3273,7 +3273,7 @@ bool VideoCapture_DShow::setProperty(int propIdx, double propVal)
     case CV_CAP_PROP_SATURATION:
     case CV_CAP_PROP_SHARPNESS:
     case CV_CAP_PROP_GAMMA:
-    case CV_CAP_PROP_MONOCROME:
+    case CV_CAP_PROP_MONOCHROME:
     case CV_CAP_PROP_WHITE_BALANCE_BLUE_U:
     case CV_CAP_PROP_BACKLIGHT:
     case CV_CAP_PROP_GAIN:
diff --git a/modules/videoio/src/cap_dshow.hpp b/modules/videoio/src/cap_dshow.hpp
index 9b906c8bf..46998c186 100644
--- a/modules/videoio/src/cap_dshow.hpp
+++ b/modules/videoio/src/cap_dshow.hpp
@@ -32,7 +32,7 @@ public:
     virtual bool grabFrame();
     virtual bool retrieveFrame(int outputType, OutputArray frame);
     virtual int getCaptureDomain();
-    bool isOpened() const;
+    virtual bool isOpened() const;
 protected:
     void open(int index);
     void close();
diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp
index 5e371d21e..c57e3c0e4 100644
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@@ -657,13 +657,13 @@ bool CvCapture_FFMPEG::grabFrame()
         frame_number > ic->streams[video_stream]->nb_frames )
         return false;
 
-    av_free_packet (&packet);
-
     picture_pts = AV_NOPTS_VALUE_;
 
     // get the next frame
     while (!valid)
     {
+
+        av_free_packet (&packet);
         int ret = av_read_frame(ic, &packet);
         if (ret == AVERROR(EAGAIN)) continue;
 
@@ -706,8 +706,6 @@ bool CvCapture_FFMPEG::grabFrame()
             if (count_errs > max_number_of_attempts)
                 break;
         }
-
-        av_free_packet (&packet);
     }
 
     if( valid && first_frame_number < 0 )
diff --git a/modules/videoio/src/cap_intelperc.hpp b/modules/videoio/src/cap_intelperc.hpp
index e154fa320..430a714f0 100644
--- a/modules/videoio/src/cap_intelperc.hpp
+++ b/modules/videoio/src/cap_intelperc.hpp
@@ -100,7 +100,7 @@ public:
     virtual bool grabFrame();
     virtual bool retrieveFrame(int outputType, OutputArray frame);
     virtual int getCaptureDomain();
-    bool isOpened() const;
+    virtual bool isOpened() const;
 protected:
     bool m_contextOpened;
 
diff --git a/modules/videoio/src/cap_mjpeg_decoder.cpp b/modules/videoio/src/cap_mjpeg_decoder.cpp
new file mode 100644
index 000000000..11a86b2ab
--- /dev/null
+++ b/modules/videoio/src/cap_mjpeg_decoder.cpp
@@ -0,0 +1,52 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+namespace cv
+{
+
+Ptr<IVideoCapture> createMotionJpegCapture(const String&)
+{
+    return Ptr<IVideoCapture>();
+}
+
+}
diff --git a/modules/videoio/src/cap_mjpeg_encoder.cpp b/modules/videoio/src/cap_mjpeg_encoder.cpp
new file mode 100644
index 000000000..7856fd416
--- /dev/null
+++ b/modules/videoio/src/cap_mjpeg_encoder.cpp
@@ -0,0 +1,1464 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <vector>
+
+#if CV_NEON
+#define WITH_NEON
+#endif
+
+namespace cv
+{
+namespace mjpeg
+{
+
+enum { COLORSPACE_GRAY=0, COLORSPACE_RGBA=1, COLORSPACE_BGR=2, COLORSPACE_YUV444P=3 };
+
+#define fourCC(a,b,c,d)   ((int)((uchar(d)<<24) | (uchar(c)<<16) | (uchar(b)<<8) | uchar(a)))
+
+static const int AVIH_STRH_SIZE = 56;
+static const int STRF_SIZE = 40;
+static const int AVI_DWFLAG = 0x00000910;
+static const int AVI_DWSCALE = 1;
+static const int AVI_DWQUALITY = -1;
+static const int JUNK_SEEK = 4096;
+static const int AVIIF_KEYFRAME = 0x10;
+static const int MAX_BYTES_PER_SEC = 99999999;
+static const int SUG_BUFFER_SIZE = 1048576;
+
+static const unsigned bit_mask[] =
+{
+    0,
+    0x00000001, 0x00000003, 0x00000007, 0x0000000F,
+    0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
+    0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
+    0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
+    0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
+    0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
+    0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
+    0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
+};
+
+class BitStream
+{
+public:
+    enum
+    {
+        DEFAULT_BLOCK_SIZE = (1 << 15),
+        huff_val_shift = 20,
+        huff_code_mask = (1 << huff_val_shift) - 1
+    };
+
+    BitStream()
+    {
+        m_buf.resize(DEFAULT_BLOCK_SIZE + 1024);
+        m_start = &m_buf[0];
+        m_end = m_start + DEFAULT_BLOCK_SIZE;
+        m_is_opened = false;
+        m_f = 0;
+    }
+
+    ~BitStream()
+    {
+        close();
+    }
+
+    bool open(const String& filename)
+    {
+        close();
+        m_f = fopen(filename.c_str(), "wb");
+        if( !m_f )
+            return false;
+        m_current = m_start;
+        m_pos = 0;
+        return true;
+    }
+
+    bool isOpened() const { return m_f != 0; }
+
+    void close()
+    {
+        writeBlock();
+        if( m_f )
+            fclose(m_f);
+        m_f = 0;
+    }
+
+    void writeBlock()
+    {
+        size_t wsz0 = m_current - m_start;
+        if( wsz0 > 0 && m_f )
+        {
+            size_t wsz = fwrite(m_start, 1, wsz0, m_f);
+            CV_Assert( wsz == wsz0 );
+        }
+        m_pos += wsz0;
+        m_current = m_start;
+    }
+
+    size_t getPos() const
+    {
+        return (size_t)(m_current - m_start) + m_pos;
+    }
+
+    void putByte(int val)
+    {
+        *m_current++ = (uchar)val;
+        if( m_current >= m_end )
+            writeBlock();
+    }
+
+    void putBytes(const uchar* buf, int count)
+    {
+        uchar* data = (uchar*)buf;
+        CV_Assert(m_f && data && m_current && count >= 0);
+        if( m_current >= m_end )
+            writeBlock();
+
+        while( count )
+        {
+            int l = (int)(m_end - m_current);
+
+            if (l > count)
+                l = count;
+
+            if( l > 0 )
+            {
+                memcpy(m_current, data, l);
+                m_current += l;
+                data += l;
+                count -= l;
+            }
+            if( m_current >= m_end )
+                writeBlock();
+        }
+    }
+
+    void putShort(int val)
+    {
+        m_current[0] = (uchar)val;
+        m_current[1] = (uchar)(val >> 8);
+        m_current += 2;
+        if( m_current >= m_end )
+            writeBlock();
+    }
+
+    void putInt(int val)
+    {
+        m_current[0] = (uchar)val;
+        m_current[1] = (uchar)(val >> 8);
+        m_current[2] = (uchar)(val >> 16);
+        m_current[3] = (uchar)(val >> 24);
+        m_current += 4;
+        if( m_current >= m_end )
+            writeBlock();
+    }
+
+    void jputShort(int val)
+    {
+        m_current[0] = (uchar)(val >> 8);
+        m_current[1] = (uchar)val;
+        m_current += 2;
+        if( m_current >= m_end )
+            writeBlock();
+    }
+
+    void patchInt(int val, size_t pos)
+    {
+        if( pos >= m_pos )
+        {
+            ptrdiff_t delta = pos - m_pos;
+            CV_Assert( delta < m_current - m_start );
+            m_start[delta] = (uchar)val;
+            m_start[delta+1] = (uchar)(val >> 8);
+            m_start[delta+2] = (uchar)(val >> 16);
+            m_start[delta+3] = (uchar)(val >> 24);
+        }
+        else
+        {
+            long fpos = ftell(m_f);
+            fseek(m_f, (long)pos, SEEK_SET);
+            uchar buf[] = { (uchar)val, (uchar)(val >> 8), (uchar)(val >> 16), (uchar)(val >> 24) };
+            fwrite(buf, 1, 4, m_f);
+            fseek(m_f, fpos, SEEK_SET);
+        }
+    }
+
+    void jput(unsigned currval)
+    {
+        uchar v;
+        uchar* ptr = m_current;
+        v = (uchar)(currval >> 24);
+        *ptr++ = v;
+        if( v == 255 )
+            *ptr++ = 0;
+        v = (uchar)(currval >> 16);
+        *ptr++ = v;
+        if( v == 255 )
+            *ptr++ = 0;
+        v = (uchar)(currval >> 8);
+        *ptr++ = v;
+        if( v == 255 )
+            *ptr++ = 0;
+        v = (uchar)currval;
+        *ptr++ = v;
+        if( v == 255 )
+            *ptr++ = 0;
+        m_current = ptr;
+        if( m_current >= m_end )
+            writeBlock();
+    }
+
+    static bool createEncodeHuffmanTable( const int* src, unsigned* table, int max_size )
+    {
+        int  i, k;
+        int  min_val = INT_MAX, max_val = INT_MIN;
+        int  size;
+
+        /* calc min and max values in the table */
+        for( i = 1, k = 1; src[k] >= 0; i++ )
+        {
+            int code_count = src[k++];
+
+            for( code_count += k; k < code_count; k++ )
+            {
+                int  val = src[k] >> huff_val_shift;
+                if( val < min_val )
+                    min_val = val;
+                if( val > max_val )
+                    max_val = val;
+            }
+        }
+
+        size = max_val - min_val + 3;
+
+        if( size > max_size )
+        {
+            CV_Error(CV_StsOutOfRange, "too big maximum Huffman code size");
+            return false;
+        }
+
+        memset( table, 0, size*sizeof(table[0]));
+
+        table[0] = min_val;
+        table[1] = size - 2;
+
+        for( i = 1, k = 1; src[k] >= 0; i++ )
+        {
+            int code_count = src[k++];
+
+            for( code_count += k; k < code_count; k++ )
+            {
+                int  val = src[k] >> huff_val_shift;
+                int  code = src[k] & huff_code_mask;
+
+                table[val - min_val + 2] = (code << 8) | i;
+            }
+        }
+        return true;
+    }
+
+    static int* createSourceHuffmanTable(const uchar* src, int* dst,
+                                         int max_bits, int first_bits)
+    {
+        int   i, val_idx, code = 0;
+        int*  table = dst;
+        *dst++ = first_bits;
+        for (i = 1, val_idx = max_bits; i <= max_bits; i++)
+        {
+            int code_count = src[i - 1];
+            dst[0] = code_count;
+            code <<= 1;
+            for (int k = 0; k < code_count; k++)
+            {
+                dst[k + 1] = (src[val_idx + k] << huff_val_shift) | (code + k);
+            }
+            code += code_count;
+            dst += code_count + 1;
+            val_idx += code_count;
+        }
+        dst[0] = -1;
+        return  table;
+    }
+
+protected:
+    std::vector<uchar> m_buf;
+    uchar*  m_start;
+    uchar*  m_end;
+    uchar*  m_current;
+    size_t  m_pos;
+    bool    m_is_opened;
+    FILE*   m_f;
+};
+
+
+class MotionJpegWriter : public IVideoWriter
+{
+public:
+    MotionJpegWriter() { rawstream = false; }
+    MotionJpegWriter(const String& filename, double fps, Size size, bool iscolor)
+    {
+        rawstream = false;
+        open(filename, fps, size, iscolor);
+    }
+    ~MotionJpegWriter() { close(); }
+
+    void close()
+    {
+        if( !strm.isOpened() )
+            return;
+
+        if( !frameOffset.empty() && !rawstream )
+        {
+            endWriteChunk(); // end LIST 'movi'
+            writeIndex();
+            finishWriteAVI();
+        }
+        strm.close();
+        frameOffset.clear();
+        frameSize.clear();
+        AVIChunkSizeIndex.clear();
+        frameNumIndexes.clear();
+    }
+
+    bool open(const String& filename, double fps, Size size, bool iscolor)
+    {
+        close();
+
+        if( filename.empty() )
+            return false;
+        const char* ext = strrchr(filename.c_str(), '.');
+        if( !ext )
+            return false;
+        if( strcmp(ext, ".avi") != 0 && strcmp(ext, ".AVI") != 0 && strcmp(ext, ".Avi") != 0 )
+            return false;
+
+        bool ok = strm.open(filename);
+        if( !ok )
+            return false;
+
+        CV_Assert(fps >= 1);
+        outfps = cvRound(fps);
+        width = size.width;
+        height = size.height;
+        quality = 75;
+        rawstream = false;
+        channels = iscolor ? 3 : 1;
+
+        if( !rawstream )
+        {
+            startWriteAVI();
+            writeStreamHeader();
+        }
+        //printf("motion jpeg stream %s has been successfully opened\n", filename.c_str());
+        return true;
+    }
+
+    bool isOpened() const { return strm.isOpened(); }
+
+    void startWriteAVI()
+    {
+        startWriteChunk(fourCC('R', 'I', 'F', 'F'));
+
+        strm.putInt(fourCC('A', 'V', 'I', ' '));
+
+        startWriteChunk(fourCC('L', 'I', 'S', 'T'));
+
+        strm.putInt(fourCC('h', 'd', 'r', 'l'));
+        strm.putInt(fourCC('a', 'v', 'i', 'h'));
+        strm.putInt(AVIH_STRH_SIZE);
+        strm.putInt(cvRound(1e6 / outfps));
+        strm.putInt(MAX_BYTES_PER_SEC);
+        strm.putInt(0);
+        strm.putInt(AVI_DWFLAG);
+
+        frameNumIndexes.push_back(strm.getPos());
+
+        strm.putInt(0);
+        strm.putInt(0);
+        strm.putInt(1); // number of streams
+        strm.putInt(SUG_BUFFER_SIZE);
+        strm.putInt(width);
+        strm.putInt(height);
+        strm.putInt(0);
+        strm.putInt(0);
+        strm.putInt(0);
+        strm.putInt(0);
+    }
+
+    void writeStreamHeader()
+    {
+        // strh
+        startWriteChunk(fourCC('L', 'I', 'S', 'T'));
+
+        strm.putInt(fourCC('s', 't', 'r', 'l'));
+        strm.putInt(fourCC('s', 't', 'r', 'h'));
+        strm.putInt(AVIH_STRH_SIZE);
+        strm.putInt(fourCC('v', 'i', 'd', 's'));
+        strm.putInt(fourCC('M', 'J', 'P', 'G'));
+        strm.putInt(0);
+        strm.putInt(0);
+        strm.putInt(0);
+        strm.putInt(AVI_DWSCALE);
+        strm.putInt(outfps);
+        strm.putInt(0);
+
+        frameNumIndexes.push_back(strm.getPos());
+
+        strm.putInt(0);
+        strm.putInt(SUG_BUFFER_SIZE);
+        strm.putInt(AVI_DWQUALITY);
+        strm.putInt(0);
+        strm.putShort(0);
+        strm.putShort(0);
+        strm.putShort(width);
+        strm.putShort(height);
+
+        // strf (use the BITMAPINFOHEADER for video)
+        startWriteChunk(fourCC('s', 't', 'r', 'f'));
+
+        strm.putInt(STRF_SIZE);
+        strm.putInt(width);
+        strm.putInt(height);
+        strm.putShort(1); // planes (1 means interleaved data (after decompression))
+
+        strm.putShort(channels); // bits per pixel
+        strm.putInt(fourCC('M', 'J', 'P', 'G'));
+        strm.putInt(width * height * channels);
+        strm.putInt(0);
+        strm.putInt(0);
+        strm.putInt(0);
+        strm.putInt(0);
+        // Must be indx chunk
+        endWriteChunk(); // end strf
+        endWriteChunk(); // end strl
+
+        // odml
+        startWriteChunk(fourCC('L', 'I', 'S', 'T'));
+        strm.putInt(fourCC('o', 'd', 'm', 'l'));
+        startWriteChunk(fourCC('d', 'm', 'l', 'h'));
+
+        frameNumIndexes.push_back(strm.getPos());
+
+        strm.putInt(0);
+        strm.putInt(0);
+
+        endWriteChunk(); // end dmlh
+        endWriteChunk(); // end odml
+
+        endWriteChunk(); // end hdrl
+
+        // JUNK
+        startWriteChunk(fourCC('J', 'U', 'N', 'K'));
+        size_t pos = strm.getPos();
+        for( ; pos < (size_t)JUNK_SEEK; pos += 4 )
+            strm.putInt(0);
+        endWriteChunk(); // end JUNK
+        // movi
+        startWriteChunk(fourCC('L', 'I', 'S', 'T'));
+        moviPointer = strm.getPos();
+        strm.putInt(fourCC('m', 'o', 'v', 'i'));
+    }
+
+    void startWriteChunk(int fourcc)
+    {
+        CV_Assert(fourcc != 0);
+        strm.putInt(fourcc);
+
+        AVIChunkSizeIndex.push_back(strm.getPos());
+        strm.putInt(0);
+    }
+
+    void endWriteChunk()
+    {
+        if( !AVIChunkSizeIndex.empty() )
+        {
+            size_t currpos = strm.getPos();
+            size_t pospos = AVIChunkSizeIndex.back();
+            AVIChunkSizeIndex.pop_back();
+            int chunksz = (int)(currpos - (pospos + 4));
+            strm.patchInt(chunksz, pospos);
+        }
+    }
+
+    void writeIndex()
+    {
+        // old style AVI index. Must be Open-DML index
+        startWriteChunk(fourCC('i', 'd', 'x', '1'));
+        int nframes = (int)frameOffset.size();
+        for( int i = 0; i < nframes; i++ )
+        {
+            strm.putInt(fourCC('0', '0', 'd', 'c'));
+            strm.putInt(AVIIF_KEYFRAME);
+            strm.putInt((int)frameOffset[i]);
+            strm.putInt((int)frameSize[i]);
+        }
+        endWriteChunk(); // End idx1
+    }
+
+    void finishWriteAVI()
+    {
+        int nframes = (int)frameOffset.size();
+        // Record frames numbers to AVI Header
+        while (!frameNumIndexes.empty())
+        {
+            size_t ppos = frameNumIndexes.back();
+            frameNumIndexes.pop_back();
+            strm.patchInt(nframes, ppos);
+        }
+        endWriteChunk(); // end RIFF
+    }
+
+    void write(InputArray _img)
+    {
+        Mat img = _img.getMat();
+        size_t chunkPointer = strm.getPos();
+        int input_channels = img.channels();
+        int colorspace = -1;
+
+        if( input_channels == 1 && channels == 1 )
+        {
+            CV_Assert( img.cols == width && img.rows == height );
+            colorspace = COLORSPACE_GRAY;
+        }
+        else if( input_channels == 4 )
+        {
+            CV_Assert( img.cols == width && img.rows == height && channels == 3 );
+            colorspace = COLORSPACE_RGBA;
+        }
+        else if( input_channels == 3 )
+        {
+            CV_Assert( img.cols == width && img.rows == height && channels == 3 );
+            colorspace = COLORSPACE_BGR;
+        }
+        else if( input_channels == 1 && channels == 3 )
+        {
+            CV_Assert( img.cols == width && img.rows == height*3 );
+            colorspace = COLORSPACE_YUV444P;
+        }
+        else
+            CV_Error(CV_StsBadArg, "Invalid combination of specified video colorspace and the input image colorspace");
+
+        if( !rawstream )
+            startWriteChunk(fourCC('0', '0', 'd', 'c'));
+
+        writeFrameData(img.data, (int)img.step, colorspace, input_channels);
+
+        if( !rawstream )
+        {
+            frameOffset.push_back(chunkPointer - moviPointer);
+            frameSize.push_back(strm.getPos() - chunkPointer - 8);       // Size excludes '00dc' and size field
+            endWriteChunk(); // end '00dc'
+        }
+    }
+
+    double getProperty(int propId) const
+    {
+        if( propId == VIDEOWRITER_PROP_QUALITY )
+            return quality;
+        if( propId == VIDEOWRITER_PROP_FRAMEBYTES )
+            return frameSize.empty() ? 0. : (double)frameSize.back();
+        return 0.;
+    }
+
+    bool setProperty(int propId, double value)
+    {
+        if( propId == VIDEOWRITER_PROP_QUALITY )
+        {
+            quality = value;
+            return true;
+        }
+        return false;
+    }
+
+    void writeFrameData( const uchar* data, int step, int colorspace, int input_channels );
+
+protected:
+    int outfps;
+    int width, height, channels;
+    double quality;
+    size_t moviPointer;
+    std::vector<size_t> frameOffset, frameSize, AVIChunkSizeIndex, frameNumIndexes;
+    bool rawstream;
+
+    BitStream strm;
+};
+
+#define DCT_DESCALE(x, n) (((x) + (((int)1) << ((n) - 1))) >> (n))
+#define fix(x, n)   (int)((x)*(1 << (n)) + .5);
+
+enum
+{
+    fixb = 14,
+    fixc = 12,
+    postshift = 14
+};
+
+static const int C0_707 = fix(0.707106781f, fixb);
+static const int C0_541 = fix(0.541196100f, fixb);
+static const int C0_382 = fix(0.382683432f, fixb);
+static const int C1_306 = fix(1.306562965f, fixb);
+
+static const int y_r = fix(0.299, fixc);
+static const int y_g = fix(0.587, fixc);
+static const int y_b = fix(0.114, fixc);
+
+static const int cb_r = -fix(0.1687, fixc);
+static const int cb_g = -fix(0.3313, fixc);
+static const int cb_b = fix(0.5, fixc);
+
+static const int cr_r = fix(0.5, fixc);
+static const int cr_g = -fix(0.4187, fixc);
+static const int cr_b = -fix(0.0813, fixc);
+
+// Standard JPEG quantization tables
+static const uchar jpegTableK1_T[] =
+{
+    16, 12, 14, 14,  18,  24,  49,  72,
+    11, 12, 13, 17,  22,  35,  64,  92,
+    10, 14, 16, 22,  37,  55,  78,  95,
+    16, 19, 24, 29,  56,  64,  87,  98,
+    24, 26, 40, 51,  68,  81, 103, 112,
+    40, 58, 57, 87, 109, 104, 121, 100,
+    51, 60, 69, 80, 103, 113, 120, 103,
+    61, 55, 56, 62,  77,  92, 101,  99
+};
+
+static const uchar jpegTableK2_T[] =
+{
+    17, 18, 24, 47, 99, 99, 99, 99,
+    18, 21, 26, 66, 99, 99, 99, 99,
+    24, 26, 56, 99, 99, 99, 99, 99,
+    47, 66, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99
+};
+
+// Standard Huffman tables
+
+// ... for luma DCs.
+static const uchar jpegTableK3[] =
+{
+    0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+};
+
+// ... for chroma DCs.
+static const uchar jpegTableK4[] =
+{
+    0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+};
+
+// ... for luma ACs.
+static const uchar jpegTableK5[] =
+{
+    0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125,
+    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+    0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+    0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+    0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+    0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+    0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+    0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+    0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+    0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+    0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+    0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+    0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+    0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+    0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+    0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+    0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa
+};
+
+// ... for chroma ACs
+static const uchar jpegTableK6[] =
+{
+    0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119,
+    0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+    0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+    0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+    0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+    0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+    0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+    0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+    0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+    0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+    0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+    0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+    0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+    0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+    0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+    0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+    0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+    0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+    0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa
+};
+
+static const uchar zigzag[] =
+{
+    0,  8,  1,  2,  9, 16, 24, 17, 10,  3,  4, 11, 18, 25, 32, 40,
+    33, 26, 19, 12,  5,  6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
+    28, 21, 14,  7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
+    23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+
+static const int idct_prescale[] =
+{
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+};
+
+static const char jpegHeader[] =
+"\xFF\xD8"  // SOI  - start of image
+"\xFF\xE0"  // APP0 - jfif extention
+"\x00\x10"  // 2 bytes: length of APP0 segment
+"JFIF\x00"  // JFIF signature
+"\x01\x02"  // version of JFIF
+"\x00"      // units = pixels ( 1 - inch, 2 - cm )
+"\x00\x01\x00\x01" // 2 2-bytes values: x density & y density
+"\x00\x00"; // width & height of thumbnail: ( 0x0 means no thumbnail)
+
+#ifdef WITH_NEON
+// FDCT with postscaling
+static void aan_fdct8x8( const short *src, short *dst,
+                        int step, const short *postscale )
+{
+    // Pass 1: process rows
+    int16x8_t x0 = vld1q_s16(src);    int16x8_t x1 = vld1q_s16(src + step*7);
+    int16x8_t x2 = vld1q_s16(src + step*3);    int16x8_t x3 = vld1q_s16(src + step*4);
+
+    int16x8_t x4 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
+    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
+
+    int16x8_t t1 = x0; int16x8_t t2 = x2;
+
+    x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
+
+    x0 = vld1q_s16(src + step);    x3 = vld1q_s16(src + step*6);
+
+    x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
+    int16x8_t t3 = x0;
+
+    x0 = vld1q_s16(src + step*2);    x3 = vld1q_s16(src + step*5);
+
+    int16x8_t t4 = vsubq_s16(x0, x3);
+
+    x0 = vaddq_s16(x0, x3);
+    x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
+    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
+
+    int16x8_t res0 = x1;
+    int16x8_t res4 = x2;
+    x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
+    x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
+
+    int16x8_t res2 = x4;
+    int16x8_t res6 = x1;
+
+    x0 = t2;    x1 = t4;
+    x2 = t3;    x3 = t1;
+    x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
+    x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
+
+    x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
+    x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
+    x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
+    x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
+
+    x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
+    x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
+
+    int16x8_t res1 = x0;
+    int16x8_t res3 = x3;
+    int16x8_t res5 = x1;
+    int16x8_t res7 = x4;
+
+    //transpose a matrix
+    /*
+     res0 00 01 02 03 04 05 06 07
+     res1 10 11 12 13 14 15 16 17
+     res2 20 21 22 23 24 25 26 27
+     res3 30 31 32 33 34 35 36 37
+     res4 40 41 42 43 44 45 46 47
+     res5 50 51 52 53 54 55 56 57
+     res6 60 61 62 63 64 65 66 67
+     res7 70 71 72 73 74 75 76 77
+     */
+
+    //transpose elements 00-33
+    int16x4_t res0_0 = vget_low_s16(res0);
+    int16x4_t res1_0 = vget_low_s16(res1);
+    int16x4x2_t tres = vtrn_s16(res0_0, res1_0);
+    int32x4_t l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
+
+    res0_0 = vget_low_s16(res2);
+    res1_0 = vget_low_s16(res3);
+    tres = vtrn_s16(res0_0, res1_0);
+    int32x4_t l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
+
+    int32x4x2_t tres1 = vtrnq_s32(l0, l1);
+
+    // transpose elements 40-73
+    res0_0 = vget_low_s16(res4);
+    res1_0 = vget_low_s16(res5);
+    tres = vtrn_s16(res0_0, res1_0);
+    l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
+
+    res0_0 = vget_low_s16(res6);
+    res1_0 = vget_low_s16(res7);
+
+    tres = vtrn_s16(res0_0, res1_0);
+    l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
+
+    int32x4x2_t tres2 = vtrnq_s32(l0, l1);
+
+    //combine into 0-3
+    int16x8_t transp_res0 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
+    int16x8_t transp_res1 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
+    int16x8_t transp_res2 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
+    int16x8_t transp_res3 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
+
+    // transpose elements 04-37
+    res0_0 = vget_high_s16(res0);
+    res1_0 = vget_high_s16(res1);
+    tres = vtrn_s16(res0_0, res1_0);
+    l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
+
+    res0_0 = vget_high_s16(res2);
+    res1_0 = vget_high_s16(res3);
+
+    tres = vtrn_s16(res0_0, res1_0);
+    l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
+
+    tres1 = vtrnq_s32(l0, l1);
+
+    // transpose elements 44-77
+    res0_0 = vget_high_s16(res4);
+    res1_0 = vget_high_s16(res5);
+    tres = vtrn_s16(res0_0, res1_0);
+    l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
+
+    res0_0 = vget_high_s16(res6);
+    res1_0 = vget_high_s16(res7);
+
+    tres = vtrn_s16(res0_0, res1_0);
+    l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
+
+    tres2 = vtrnq_s32(l0, l1);
+
+    //combine into 4-7
+    int16x8_t transp_res4 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
+    int16x8_t transp_res5 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
+    int16x8_t transp_res6 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
+    int16x8_t transp_res7 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
+
+    //special hack for vqdmulhq_s16 command that is producing -1 instead of 0
+#define STORE_DESCALED(addr, reg, mul_addr)            postscale_line = vld1q_s16((mul_addr)); \
+mask = vreinterpretq_s16_u16(vcltq_s16((reg), z)); \
+reg = vabsq_s16(reg); \
+reg = vqdmulhq_s16(vqaddq_s16((reg), (reg)), postscale_line); \
+reg = vsubq_s16(veorq_s16(reg, mask), mask); \
+vst1q_s16((addr), reg);
+
+    int16x8_t z = vdupq_n_s16(0), postscale_line, mask;
+
+    // pass 2: process columns
+    x0 = transp_res0;    x1 = transp_res7;
+    x2 = transp_res3;    x3 = transp_res4;
+
+    x4 = vaddq_s16(x0, x1);   x0 = vsubq_s16(x0, x1);
+    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
+
+    t1 = x0; t2 = x2;
+
+    x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
+
+    x0 = transp_res1;
+    x3 = transp_res6;
+
+    x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
+
+    t3 = x0;
+
+    x0 = transp_res2; x3 = transp_res5;
+
+    t4 = vsubq_s16(x0, x3);
+
+    x0 = vaddq_s16(x0, x3);
+
+    x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
+    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
+
+    STORE_DESCALED(dst, x1, postscale);
+    STORE_DESCALED(dst + 4*8, x2, postscale + 4*8);
+
+    x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
+
+    x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
+
+    STORE_DESCALED(dst + 2*8, x4,postscale + 2*8);
+    STORE_DESCALED(dst + 6*8, x1,postscale + 6*8);
+
+    x0 = t2; x1 = t4;
+    x2 = t3; x3 = t1;
+
+    x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
+
+    x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
+
+    x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
+
+    x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
+    x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
+    x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
+
+    x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
+    x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
+
+    STORE_DESCALED(dst + 5*8, x1,postscale + 5*8);
+    STORE_DESCALED(dst + 1*8, x0,postscale + 1*8);
+    STORE_DESCALED(dst + 7*8, x4,postscale + 7*8);
+    STORE_DESCALED(dst + 3*8, x3,postscale + 3*8);
+}
+
+#else
+// FDCT with postscaling
+static void aan_fdct8x8( const short *src, short *dst,
+                        int step, const short *postscale )
+{
+    int workspace[64], *work = workspace;
+    int  i;
+
+    // Pass 1: process rows
+    for( i = 8; i > 0; i--, src += step, work += 8 )
+    {
+        int x0 = src[0], x1 = src[7];
+        int x2 = src[3], x3 = src[4];
+
+        int x4 = x0 + x1; x0 -= x1;
+        x1 = x2 + x3; x2 -= x3;
+
+        work[7] = x0; work[1] = x2;
+        x2 = x4 + x1; x4 -= x1;
+
+        x0 = src[1]; x3 = src[6];
+        x1 = x0 + x3; x0 -= x3;
+        work[5] = x0;
+
+        x0 = src[2]; x3 = src[5];
+        work[3] = x0 - x3; x0 += x3;
+
+        x3 = x0 + x1; x0 -= x1;
+        x1 = x2 + x3; x2 -= x3;
+
+        work[0] = x1; work[4] = x2;
+
+        x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
+        x1 = x4 + x0; x4 -= x0;
+        work[2] = x4; work[6] = x1;
+
+        x0 = work[1]; x1 = work[3];
+        x2 = work[5]; x3 = work[7];
+
+        x0 += x1; x1 += x2; x2 += x3;
+        x1 = DCT_DESCALE(x1*C0_707, fixb);
+
+        x4 = x1 + x3; x3 -= x1;
+        x1 = (x0 - x2)*C0_382;
+        x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
+        x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
+
+        x1 = x0 + x3; x3 -= x0;
+        x0 = x4 + x2; x4 -= x2;
+
+        work[5] = x1; work[1] = x0;
+        work[7] = x4; work[3] = x3;
+    }
+
+    work = workspace;
+    // pass 2: process columns
+    for( i = 8; i > 0; i--, work++, postscale += 8, dst += 8 )
+    {
+        int  x0 = work[8*0], x1 = work[8*7];
+        int  x2 = work[8*3], x3 = work[8*4];
+
+        int  x4 = x0 + x1; x0 -= x1;
+        x1 = x2 + x3; x2 -= x3;
+
+        work[8*7] = x0; work[8*0] = x2;
+        x2 = x4 + x1; x4 -= x1;
+
+        x0 = work[8*1]; x3 = work[8*6];
+        x1 = x0 + x3; x0 -= x3;
+        work[8*4] = x0;
+
+        x0 = work[8*2]; x3 = work[8*5];
+        work[8*3] = x0 - x3; x0 += x3;
+
+        x3 = x0 + x1; x0 -= x1;
+        x1 = x2 + x3; x2 -= x3;
+
+        dst[0] = (short)DCT_DESCALE(x1*postscale[0], postshift);
+        dst[4] = (short)DCT_DESCALE(x2*postscale[4], postshift);
+
+        x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
+        x1 = x4 + x0; x4 -= x0;
+
+        dst[2] = (short)DCT_DESCALE(x4*postscale[2], postshift);
+        dst[6] = (short)DCT_DESCALE(x1*postscale[6], postshift);
+
+        x0 = work[8*0]; x1 = work[8*3];
+        x2 = work[8*4]; x3 = work[8*7];
+
+        x0 += x1; x1 += x2; x2 += x3;
+        x1 = DCT_DESCALE(x1*C0_707, fixb);
+
+        x4 = x1 + x3; x3 -= x1;
+        x1 = (x0 - x2)*C0_382;
+        x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
+        x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
+
+        x1 = x0 + x3; x3 -= x0;
+        x0 = x4 + x2; x4 -= x2;
+
+        dst[5] = (short)DCT_DESCALE(x1*postscale[5], postshift);
+        dst[1] = (short)DCT_DESCALE(x0*postscale[1], postshift);
+        dst[7] = (short)DCT_DESCALE(x4*postscale[7], postshift);
+        dst[3] = (short)DCT_DESCALE(x3*postscale[3], postshift);
+    }
+}
+#endif
+
+void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )
+{
+    //double total_cvt = 0, total_dct = 0;
+    static bool init_cat_table = false;
+    const int CAT_TAB_SIZE = 4096;
+    static uchar cat_table[CAT_TAB_SIZE*2+1];
+    if( !init_cat_table )
+    {
+        for( int i = -CAT_TAB_SIZE; i <= CAT_TAB_SIZE; i++ )
+        {
+            Cv32suf a;
+            a.f = (float)i;
+            cat_table[i+CAT_TAB_SIZE] = ((a.i >> 23) & 255) - (126 & (i ? -1 : 0));
+        }
+        init_cat_table = true;
+    }
+
+    //double total_dct = 0, total_cvt = 0;
+    CV_Assert( data && width > 0 && height > 0 );
+
+    // encode the header and tables
+    // for each mcu:
+    //   convert rgb to yuv with downsampling (if color).
+    //   for every block:
+    //     calc dct and quantize
+    //     encode block.
+    int x, y;
+    int i, j;
+    const int max_quality = 12;
+    short fdct_qtab[2][64];
+    unsigned huff_dc_tab[2][16];
+    unsigned huff_ac_tab[2][256];
+
+    int  x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
+    int  dc_pred[] = { 0, 0, 0 };
+    int  x_step = x_scale * 8;
+    int  y_step = y_scale * 8;
+    short  block[6][64];
+    short  buffer[4096];
+    int*   hbuffer = (int*)buffer;
+    int  luma_count = x_scale*y_scale;
+    int  block_count = luma_count + channels - 1;
+    int  Y_step = x_scale*8;
+    const int UV_step = 16;
+    int u_plane_ofs = step*height;
+    int v_plane_ofs = u_plane_ofs + step*height;
+    double _quality = quality*0.01*max_quality;
+
+    if( _quality < 1. ) _quality = 1.;
+    if( _quality > max_quality ) _quality = max_quality;
+
+    double inv_quality = 1./_quality;
+
+    // Encode header
+    strm.putBytes( (const uchar*)jpegHeader, sizeof(jpegHeader) - 1 );
+
+    // Encode quantization tables
+    for( i = 0; i < (channels > 1 ? 2 : 1); i++ )
+    {
+        const uchar* qtable = i == 0 ? jpegTableK1_T : jpegTableK2_T;
+        int chroma_scale = i > 0 ? luma_count : 1;
+
+        strm.jputShort( 0xffdb );   // DQT marker
+        strm.jputShort( 2 + 65*1 ); // put single qtable
+        strm.putByte( 0*16 + i );   // 8-bit table
+
+        // put coefficients
+        for( j = 0; j < 64; j++ )
+        {
+            int idx = zigzag[j];
+            int qval = cvRound(qtable[idx]*inv_quality);
+            if( qval < 1 )
+                qval = 1;
+            if( qval > 255 )
+                qval = 255;
+            fdct_qtab[i][idx] = (short)(cvRound((1 << (postshift + 11)))/
+                                (qval*chroma_scale*idct_prescale[idx]));
+            strm.putByte( qval );
+        }
+    }
+
+    // Encode huffman tables
+    for( i = 0; i < (channels > 1 ? 4 : 2); i++ )
+    {
+        const uchar* htable = i == 0 ? jpegTableK3 : i == 1 ? jpegTableK5 :
+        i == 2 ? jpegTableK4 : jpegTableK6;
+        int is_ac_tab = i & 1;
+        int idx = i >= 2;
+        int tableSize = 16 + (is_ac_tab ? 162 : 12);
+
+        strm.jputShort( 0xFFC4 );      // DHT marker
+        strm.jputShort( 3 + tableSize ); // define one huffman table
+        strm.putByte( is_ac_tab*16 + idx ); // put DC/AC flag and table index
+        strm.putBytes( htable, tableSize ); // put table
+
+        BitStream::createEncodeHuffmanTable( BitStream::createSourceHuffmanTable(
+                                            htable, hbuffer, 16, 9 ), is_ac_tab ? huff_ac_tab[idx] :
+                                            huff_dc_tab[idx], is_ac_tab ? 256 : 16 );
+    }
+
+    // put frame header
+    strm.jputShort( 0xFFC0 );          // SOF0 marker
+    strm.jputShort( 8 + 3*channels );  // length of frame header
+    strm.putByte( 8 );               // sample precision
+    strm.jputShort( height );
+    strm.jputShort( width );
+    strm.putByte( channels );        // number of components
+
+    for( i = 0; i < channels; i++ )
+    {
+        strm.putByte( i + 1 );  // (i+1)-th component id (Y,U or V)
+        if( i == 0 )
+            strm.putByte(x_scale*16 + y_scale); // chroma scale factors
+        else
+            strm.putByte(1*16 + 1);
+        strm.putByte( i > 0 ); // quantization table idx
+    }
+
+    // put scan header
+    strm.jputShort( 0xFFDA );          // SOS marker
+    strm.jputShort( 6 + 2*channels );  // length of scan header
+    strm.putByte( channels );          // number of components in the scan
+
+    for( i = 0; i < channels; i++ )
+    {
+        strm.putByte( i+1 );             // component id
+        strm.putByte( (i>0)*16 + (i>0) );// selection of DC & AC tables
+    }
+
+    strm.jputShort(0*256 + 63); // start and end of spectral selection - for
+    // sequental DCT start is 0 and end is 63
+
+    strm.putByte( 0 );  // successive approximation bit position
+    // high & low - (0,0) for sequental DCT
+    unsigned currval = 0, code = 0, tempval = 0;
+    int bit_idx = 32;
+
+#define JPUT_BITS(val, bits) \
+    bit_idx -= (bits); \
+    tempval = (val) & bit_mask[(bits)]; \
+    if( bit_idx <= 0 ) \
+    {  \
+        strm.jput(currval | ((unsigned)tempval >> -bit_idx)); \
+        bit_idx += 32; \
+        currval = bit_idx < 32 ? (tempval << bit_idx) : 0; \
+    } \
+    else \
+        currval |= (tempval << bit_idx)
+
+#define JPUT_HUFF(val, table) \
+    code = table[(val) + 2]; \
+    JPUT_BITS(code >> 8, (int)(code & 255))
+
+    // encode data
+    for( y = 0; y < height; y += y_step, data += y_step*step )
+    {
+        for( x = 0; x < width; x += x_step )
+        {
+            int x_limit = x_step;
+            int y_limit = y_step;
+            const uchar* pix_data = data + x*input_channels;
+            short* Y_data = block[0];
+
+            if( x + x_limit > width ) x_limit = width - x;
+            if( y + y_limit > height ) y_limit = height - y;
+
+            memset( block, 0, block_count*64*sizeof(block[0][0]));
+
+            if( channels > 1 )
+            {
+                short* UV_data = block[luma_count];
+                // double t = (double)cv::getTickCount();
+
+                if( colorspace == COLORSPACE_YUV444P && y_limit == 16 && x_limit == 16 )
+                {
+                    for( i = 0; i < y_limit; i += 2, pix_data += step*2, Y_data += Y_step*2, UV_data += UV_step )
+                    {
+#ifdef WITH_NEON
+                        {
+                            uint16x8_t masklo = vdupq_n_u16(255);
+                            uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs));
+                            uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
+                            lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step));
+                            uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
+                            t1 = vaddq_u16(t1, t2);
+                            vst1q_s16(UV_data, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
+
+                            lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs));
+                            t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
+                            lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step));
+                            t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
+                            t1 = vaddq_u16(t1, t2);
+                            vst1q_s16(UV_data + 8, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
+                        }
+
+                        {
+                            int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data)));
+                            int16x8_t delta = vdupq_n_s16(128);
+                            lane = vsubq_s16(lane, delta);
+                            vst1q_s16(Y_data, lane);
+
+                            lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+8)));
+                            lane = vsubq_s16(lane, delta);
+                            vst1q_s16(Y_data + 8, lane);
+
+                            lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+step)));
+                            lane = vsubq_s16(lane, delta);
+                            vst1q_s16(Y_data+Y_step, lane);
+
+                            lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data + step + 8)));
+                            lane = vsubq_s16(lane, delta);
+                            vst1q_s16(Y_data+Y_step + 8, lane);
+                        }
+#else
+                        for( j = 0; j < x_limit; j += 2, pix_data += 2 )
+                        {
+                            Y_data[j] = pix_data[0] - 128;
+                            Y_data[j+1] = pix_data[1] - 128;
+                            Y_data[j+Y_step] = pix_data[step] - 128;
+                            Y_data[j+Y_step+1] = pix_data[step+1] - 128;
+
+                            UV_data[j>>1] = pix_data[v_plane_ofs] + pix_data[v_plane_ofs+1] +
+                                pix_data[v_plane_ofs+step] + pix_data[v_plane_ofs+step+1] - 128*4;
+                            UV_data[(j>>1)+8] = pix_data[u_plane_ofs] + pix_data[u_plane_ofs+1] +
+                                pix_data[u_plane_ofs+step] + pix_data[u_plane_ofs+step+1] - 128*4;
+
+                        }
+
+                        pix_data -= x_limit*input_channels;
+#endif
+                    }
+                }
+                else
+                {
+                    for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
+                    {
+                        for( j = 0; j < x_limit; j++, pix_data += input_channels )
+                        {
+                            int Y, U, V;
+
+                            if( colorspace == COLORSPACE_BGR )
+                            {
+                                int r = pix_data[2];
+                                int g = pix_data[1];
+                                int b = pix_data[0];
+
+                                Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
+                                U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
+                                V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
+                            }
+                            else if( colorspace == COLORSPACE_RGBA )
+                            {
+                                int r = pix_data[0];
+                                int g = pix_data[1];
+                                int b = pix_data[2];
+
+                                Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
+                                U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
+                                V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
+                            }
+                            else
+                            {
+                                Y = pix_data[0] - 128;
+                                U = pix_data[v_plane_ofs] - 128;
+                                V = pix_data[u_plane_ofs] - 128;
+                            }
+
+                            int j2 = j >> (x_scale - 1);
+                            Y_data[j] = (short)Y;
+                            UV_data[j2] = (short)(UV_data[j2] + U);
+                            UV_data[j2 + 8] = (short)(UV_data[j2 + 8] + V);
+                        }
+
+                        pix_data -= x_limit*input_channels;
+                        if( ((i+1) & (y_scale - 1)) == 0 )
+                        {
+                            UV_data += UV_step;
+                        }
+                    }
+                }
+
+                // total_cvt += (double)cv::getTickCount() - t;
+            }
+            else
+            {
+                for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
+                {
+                    for( j = 0; j < x_limit; j++ )
+                        Y_data[j] = (short)(pix_data[j]*4 - 128*4);
+                }
+            }
+
+            for( i = 0; i < block_count; i++ )
+            {
+                int is_chroma = i >= luma_count;
+                int src_step = x_scale * 8;
+                int run = 0, val;
+                const short* src_ptr = block[i & -2] + (i & 1)*8;
+                const unsigned* htable = huff_ac_tab[is_chroma];
+
+                //double t = (double)cv::getTickCount();
+                aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );
+                //total_dct += (double)cv::getTickCount() - t;
+
+                j = is_chroma + (i > luma_count);
+                val = buffer[0] - dc_pred[j];
+                dc_pred[j] = buffer[0];
+
+                {
+                    int cat = cat_table[val + CAT_TAB_SIZE];
+
+                    //CV_Assert( cat <= 11 );
+                    JPUT_HUFF( cat, huff_dc_tab[is_chroma] );
+                    JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
+                }
+
+                for( j = 1; j < 64; j++ )
+                {
+                    val = buffer[zigzag[j]];
+
+                    if( val == 0 )
+                    {
+                        run++;
+                    }
+                    else
+                    {
+                        while( run >= 16 )
+                        {
+                            JPUT_HUFF( 0xF0, htable ); // encode 16 zeros
+                            run -= 16;
+                        }
+
+                        {
+                            int cat = cat_table[val + CAT_TAB_SIZE];
+                            //CV_Assert( cat <= 10 );
+                            JPUT_HUFF( cat + run*16, htable );
+                            JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
+                        }
+
+                        run = 0;
+                    }
+                }
+
+                if( run )
+                {
+                    JPUT_HUFF( 0x00, htable ); // encode EOB
+                }
+            }
+        }
+    }
+
+    // Flush
+    JPUT_BITS((unsigned)-1, bit_idx & 31);
+    strm.jputShort( 0xFFD9 ); // EOI marker
+    /*printf("total dct = %.1fms, total cvt = %.1fms\n",
+     total_dct*1000./cv::getTickFrequency(),
+     total_cvt*1000./cv::getTickFrequency());*/
+    size_t pos = strm.getPos();
+    size_t pos1 = (pos + 3) & ~3;
+    for( ; pos < pos1; pos++ )
+        strm.putByte(0);
+}
+
+}
+
+Ptr<IVideoWriter> createMotionJpegWriter( const String& filename, double fps, Size frameSize, bool iscolor )
+{
+    Ptr<IVideoWriter> iwriter = makePtr<mjpeg::MotionJpegWriter>(filename, fps, frameSize, iscolor);
+    if( !iwriter->isOpened() )
+        iwriter.release();
+    return iwriter;
+}
+
+}
diff --git a/modules/videoio/src/cap_pvapi.cpp b/modules/videoio/src/cap_pvapi.cpp
index 5c7e05e34..a6577fc78 100644
--- a/modules/videoio/src/cap_pvapi.cpp
+++ b/modules/videoio/src/cap_pvapi.cpp
@@ -60,6 +60,7 @@
 #ifdef WIN32
 #  include <io.h>
 #else
+#  include <time.h>
 #  include <unistd.h>
 #endif
 
@@ -106,18 +107,14 @@ protected:
     } tCamera;
 
     IplImage *frame;
-    IplImage *grayframe;
     tCamera  Camera;
     tPvErr   Errcode;
-    bool monocrome;
 };
 
 
 CvCaptureCAM_PvAPI::CvCaptureCAM_PvAPI()
 {
-    monocrome=false;
     frame = NULL;
-    grayframe = NULL;
     memset(&this->Camera, 0, sizeof(this->Camera));
 }
 
@@ -190,13 +187,6 @@ bool CvCaptureCAM_PvAPI::open( int index )
         tPvUint32 frameWidth, frameHeight;
         unsigned long maxSize;
 
-        // By Default, try to set the pixel format to Mono8.  This can be changed later
-        // via calls to setProperty. Some colour cameras (i.e. the Manta line) have a default
-        // image mode of Bayer8, which is currently unsupported, so Mono8 is a safe bet for
-        // startup.
-
-        monocrome = (PvAttrEnumSet(Camera.Handle, "PixelFormat", "Mono8") == ePvErrSuccess);
-
         PvAttrUint32Get(Camera.Handle, "Width", &frameWidth);
         PvAttrUint32Get(Camera.Handle, "Height", &frameHeight);
 
@@ -229,15 +219,9 @@ bool CvCaptureCAM_PvAPI::grabFrame()
 
 IplImage* CvCaptureCAM_PvAPI::retrieveFrame(int)
 {
-
     if (PvCaptureWaitForFrameDone(Camera.Handle, &(Camera.Frame), 1000) == ePvErrSuccess)
     {
-        if (!monocrome)
-        {
-            cvMerge(grayframe,grayframe,grayframe,NULL,frame);
-            return frame;
-        }
-        return grayframe;
+        return frame;
     }
     else return NULL;
 }
@@ -254,11 +238,6 @@ double CvCaptureCAM_PvAPI::getProperty( int property_id ) const
     case CV_CAP_PROP_FRAME_HEIGHT:
         PvAttrUint32Get(Camera.Handle, "Height", &nTemp);
         return (double)nTemp;
-    case CV_CAP_PROP_MONOCROME:
-        if (monocrome)
-          return 1;
-        else
-          return 0;
     case CV_CAP_PROP_EXPOSURE:
         PvAttrUint32Get(Camera.Handle,"ExposureValue",&nTemp);
         return (double)nTemp;
@@ -312,6 +291,25 @@ double CvCaptureCAM_PvAPI::getProperty( int property_id ) const
     case CV_CAP_PROP_PVAPI_BINNINGY:
         PvAttrUint32Get(Camera.Handle,"BinningY",&nTemp);
         return (double)nTemp;
+    case CV_CAP_PROP_PVAPI_PIXELFORMAT:
+        char pixelFormat[256];
+        PvAttrEnumGet(Camera.Handle, "PixelFormat", pixelFormat,256,NULL);
+        if (strcmp(pixelFormat, "Mono8")==0)
+            return 1.0;
+        else if (strcmp(pixelFormat, "Mono16")==0)
+            return 2.0;
+        else if (strcmp(pixelFormat, "Bayer8")==0)
+            return 3.0;
+        else if (strcmp(pixelFormat, "Bayer16")==0)
+            return 4.0;
+        else if (strcmp(pixelFormat, "Rgb24")==0)
+            return 5.0;
+        else if (strcmp(pixelFormat, "Bgr24")==0)
+            return 6.0;
+        else if (strcmp(pixelFormat, "Rgba32")==0)
+            return 7.0;
+        else if (strcmp(pixelFormat, "Bgra32")==0)
+            return 8.0;
     }
     return -1.0;
 }
@@ -359,21 +357,6 @@ bool CvCaptureCAM_PvAPI::setProperty( int property_id, double value )
 
         break;
     }
-    case CV_CAP_PROP_MONOCROME:
-        if (value==1)
-        {
-            char pixelFormat[256];
-            PvAttrEnumGet(Camera.Handle, "PixelFormat", pixelFormat,256,NULL);
-            if ((strcmp(pixelFormat, "Mono8")==0) || strcmp(pixelFormat, "Mono16")==0)
-            {
-                monocrome=true;
-            }
-            else
-                return false;
-        }
-        else
-            monocrome=false;
-        break;
     case CV_CAP_PROP_EXPOSURE:
         if ((PvAttrUint32Set(Camera.Handle,"ExposureValue",(tPvUint32)value)==ePvErrSuccess))
             break;
@@ -449,6 +432,51 @@ bool CvCaptureCAM_PvAPI::setProperty( int property_id, double value )
             break;
         else
             return false;
+    case CV_CAP_PROP_PVAPI_PIXELFORMAT:
+        {
+            cv::String pixelFormat;
+
+            if (value==1)
+                pixelFormat = "Mono8";
+            else if (value==2)
+                pixelFormat = "Mono16";
+            else if (value==3)
+                pixelFormat = "Bayer8";
+            else if (value==4)
+                pixelFormat = "Bayer16";
+            else if (value==5)
+                pixelFormat = "Rgb24";
+            else if (value==6)
+                pixelFormat = "Bgr24";
+            else if (value==7)
+                pixelFormat = "Rgba32";
+            else if (value==8)
+                pixelFormat = "Bgra32";
+            else
+                return false;
+
+            if ((PvAttrEnumSet(Camera.Handle,"PixelFormat", pixelFormat.c_str())==ePvErrSuccess))
+            {
+                tPvUint32 currWidth;
+                tPvUint32 currHeight;
+
+                PvAttrUint32Get(Camera.Handle, "Width", &currWidth);
+                PvAttrUint32Get(Camera.Handle, "Height", &currHeight);
+
+                stopCapture();
+                // Reallocate Frames
+                if (!resizeCaptureFrame(currWidth, currHeight))
+                {
+                    startCapture();
+                    return false;
+                }
+
+                startCapture();
+                return true;
+            }
+            else
+                return false;
+        }
     default:
         return false;
     }
@@ -495,13 +523,6 @@ bool CvCaptureCAM_PvAPI::resizeCaptureFrame (int frameWidth, int frameHeight)
     tPvUint32 sensorHeight;
     tPvUint32 sensorWidth;
 
-
-    if (grayframe)
-    {
-        cvReleaseImage(&grayframe);
-        grayframe = NULL;
-    }
-
     if (frame)
     {
         cvReleaseImage(&frame);
@@ -544,28 +565,31 @@ bool CvCaptureCAM_PvAPI::resizeCaptureFrame (int frameWidth, int frameHeight)
     PvAttrUint32Get(Camera.Handle, "TotalBytesPerFrame", &frameSize);
 
 
-    if (strcmp(pixelFormat, "Mono8")==0)
+    if ( (strcmp(pixelFormat, "Mono8")==0) || (strcmp(pixelFormat, "Bayer8")==0) )
+    {
+        frame = cvCreateImage(cvSize((int)frameWidth, (int)frameHeight), IPL_DEPTH_8U, 1);
+        frame->widthStep = (int)frameWidth;
+        Camera.Frame.ImageBufferSize = frameSize;
+        Camera.Frame.ImageBuffer = frame->imageData;
+    }
+    else if ( (strcmp(pixelFormat, "Mono16")==0) || (strcmp(pixelFormat, "Bayer16")==0) )
+    {
+        frame = cvCreateImage(cvSize((int)frameWidth, (int)frameHeight), IPL_DEPTH_16U, 1);
+        frame->widthStep = (int)frameWidth*2;
+        Camera.Frame.ImageBufferSize = frameSize;
+        Camera.Frame.ImageBuffer = frame->imageData;
+    }
+    else if ( (strcmp(pixelFormat, "Rgb24")==0) || (strcmp(pixelFormat, "Bgr24")==0) )
     {
-        grayframe = cvCreateImage(cvSize((int)frameWidth, (int)frameHeight), IPL_DEPTH_8U, 1);
-        grayframe->widthStep = (int)frameWidth;
         frame = cvCreateImage(cvSize((int)frameWidth, (int)frameHeight), IPL_DEPTH_8U, 3);
         frame->widthStep = (int)frameWidth*3;
         Camera.Frame.ImageBufferSize = frameSize;
-        Camera.Frame.ImageBuffer = grayframe->imageData;
+        Camera.Frame.ImageBuffer = frame->imageData;
     }
-    else if (strcmp(pixelFormat, "Mono16")==0)
+    else if ( (strcmp(pixelFormat, "Rgba32")==0) || (strcmp(pixelFormat, "Bgra32")==0) )
     {
-        grayframe = cvCreateImage(cvSize((int)frameWidth, (int)frameHeight), IPL_DEPTH_16U, 1);
-        grayframe->widthStep = (int)frameWidth;
-        frame = cvCreateImage(cvSize((int)frameWidth, (int)frameHeight), IPL_DEPTH_16U, 3);
-        frame->widthStep = (int)frameWidth*3;
-        Camera.Frame.ImageBufferSize = frameSize;
-        Camera.Frame.ImageBuffer = grayframe->imageData;
-    }
-    else if (strcmp(pixelFormat, "Bgr24")==0)
-    {
-        frame = cvCreateImage(cvSize((int)frameWidth, (int)frameHeight), IPL_DEPTH_8U, 3);
-        frame->widthStep = (int)frameWidth*3;
+        frame = cvCreateImage(cvSize((int)frameWidth, (int)frameHeight), IPL_DEPTH_8U, 4);
+        frame->widthStep = (int)frameWidth*4;
         Camera.Frame.ImageBufferSize = frameSize;
         Camera.Frame.ImageBuffer = frame->imageData;
     }
diff --git a/modules/videoio/src/cap_ximea.cpp b/modules/videoio/src/cap_ximea.cpp
index 8356b4d92..ccf49e45b 100644
--- a/modules/videoio/src/cap_ximea.cpp
+++ b/modules/videoio/src/cap_ximea.cpp
@@ -52,7 +52,15 @@ CvCapture* cvCreateCameraCapture_XIMEA( int index )
 // Enumerate connected devices
 void CvCaptureCAM_XIMEA::init()
 {
+#if defined WIN32 || defined _WIN32
     xiGetNumberDevices( &numDevices);
+#else
+    // try second re-enumeration if first one fails
+    if (xiGetNumberDevices( &numDevices) != XI_OK)
+    {
+        xiGetNumberDevices( &numDevices);
+    }
+#endif
     hmv = NULL;
     frame = NULL;
     timeout = 0;
@@ -73,8 +81,17 @@ bool CvCaptureCAM_XIMEA::open( int wIndex )
 
     if((mvret = xiOpenDevice( wIndex, &hmv)) != XI_OK)
     {
+#if defined WIN32 || defined _WIN32
         errMsg("Open XI_DEVICE failed", mvret);
         return false;
+#else
+        // try opening second time if first fails
+        if((mvret = xiOpenDevice( wIndex, &hmv))  != XI_OK)
+        {
+            errMsg("Open XI_DEVICE failed", mvret);
+            return false;
+        }
+#endif
     }
 
     int width   = 0;
@@ -260,7 +277,7 @@ double CvCaptureCAM_XIMEA::getProperty( int property_id ) const
     case CV_CAP_PROP_XI_AUTO_WB       : xiGetParamInt( hmv, XI_PRM_AUTO_WB, &ival); return ival;
     case CV_CAP_PROP_XI_AEAG          : xiGetParamInt( hmv, XI_PRM_AEAG, &ival); return ival;
     case CV_CAP_PROP_XI_EXP_PRIORITY  : xiGetParamFloat( hmv, XI_PRM_EXP_PRIORITY, &fval); return fval;
-    case CV_CAP_PROP_XI_AE_MAX_LIMIT  : xiGetParamInt( hmv, XI_PRM_AE_MAX_LIMIT, &ival); return ival;
+    case CV_CAP_PROP_XI_AE_MAX_LIMIT  : xiGetParamInt( hmv, XI_PRM_EXP_PRIORITY, &ival); return ival;
     case CV_CAP_PROP_XI_AG_MAX_LIMIT  : xiGetParamFloat( hmv, XI_PRM_AG_MAX_LIMIT, &fval); return fval;
     case CV_CAP_PROP_XI_AEAG_LEVEL    : xiGetParamInt( hmv, XI_PRM_AEAG_LEVEL, &ival); return ival;
     case CV_CAP_PROP_XI_TIMEOUT       : return timeout;
@@ -293,7 +310,7 @@ bool CvCaptureCAM_XIMEA::setProperty( int property_id, double value )
     case CV_CAP_PROP_XI_OFFSET_Y      : mvret = xiSetParamInt( hmv, XI_PRM_OFFSET_Y, ival); break;
     case CV_CAP_PROP_XI_TRG_SOURCE    : mvret = xiSetParamInt( hmv, XI_PRM_TRG_SOURCE, ival); break;
     case CV_CAP_PROP_XI_GPI_SELECTOR  : mvret = xiSetParamInt( hmv, XI_PRM_GPI_SELECTOR, ival); break;
-    case CV_CAP_PROP_XI_TRG_SOFTWARE  : mvret = xiSetParamInt( hmv, XI_PRM_TRG_SOFTWARE, 1); break;
+    case CV_CAP_PROP_XI_TRG_SOFTWARE  : mvret = xiSetParamInt( hmv, XI_PRM_TRG_SOURCE, 1); break;
     case CV_CAP_PROP_XI_GPI_MODE      : mvret = xiSetParamInt( hmv, XI_PRM_GPI_MODE, ival); break;
     case CV_CAP_PROP_XI_GPI_LEVEL     : mvret = xiSetParamInt( hmv, XI_PRM_GPI_LEVEL, ival); break;
     case CV_CAP_PROP_XI_GPO_SELECTOR  : mvret = xiSetParamInt( hmv, XI_PRM_GPO_SELECTOR, ival); break;
@@ -301,10 +318,10 @@ bool CvCaptureCAM_XIMEA::setProperty( int property_id, double value )
     case CV_CAP_PROP_XI_LED_SELECTOR  : mvret = xiSetParamInt( hmv, XI_PRM_LED_SELECTOR, ival); break;
     case CV_CAP_PROP_XI_LED_MODE      : mvret = xiSetParamInt( hmv, XI_PRM_LED_MODE, ival); break;
     case CV_CAP_PROP_XI_AUTO_WB       : mvret = xiSetParamInt( hmv, XI_PRM_AUTO_WB, ival); break;
-    case CV_CAP_PROP_XI_MANUAL_WB     : mvret = xiSetParamInt( hmv, XI_PRM_MANUAL_WB, ival); break;
+    case CV_CAP_PROP_XI_MANUAL_WB     : mvret = xiSetParamInt( hmv, XI_PRM_LED_MODE, ival); break;
     case CV_CAP_PROP_XI_AEAG          : mvret = xiSetParamInt( hmv, XI_PRM_AEAG, ival); break;
     case CV_CAP_PROP_XI_EXP_PRIORITY  : mvret = xiSetParamFloat( hmv, XI_PRM_EXP_PRIORITY, fval); break;
-    case CV_CAP_PROP_XI_AE_MAX_LIMIT  : mvret = xiSetParamInt( hmv, XI_PRM_AE_MAX_LIMIT, ival); break;
+    case CV_CAP_PROP_XI_AE_MAX_LIMIT  : mvret = xiSetParamInt( hmv, XI_PRM_EXP_PRIORITY, ival); break;
     case CV_CAP_PROP_XI_AG_MAX_LIMIT  : mvret = xiSetParamFloat( hmv, XI_PRM_AG_MAX_LIMIT, fval); break;
     case CV_CAP_PROP_XI_AEAG_LEVEL    : mvret = xiSetParamInt( hmv, XI_PRM_AEAG_LEVEL, ival); break;
     case CV_CAP_PROP_XI_TIMEOUT       : timeout = ival; break;
diff --git a/modules/videoio/src/precomp.hpp b/modules/videoio/src/precomp.hpp
index c399d72b1..c4662cccc 100644
--- a/modules/videoio/src/precomp.hpp
+++ b/modules/videoio/src/precomp.hpp
@@ -166,11 +166,26 @@ namespace cv
     public:
         virtual ~IVideoCapture() {}
         virtual double getProperty(int) const { return 0; }
-        virtual bool setProperty(int, double) { return 0; }
+        virtual bool setProperty(int, double) { return false; }
         virtual bool grabFrame() = 0;
-        virtual bool retrieveFrame(int, cv::OutputArray) = 0;
+        virtual bool retrieveFrame(int, OutputArray) = 0;
+        virtual bool isOpened() const = 0;
         virtual int getCaptureDomain() { return CAP_ANY; } // Return the type of the capture object: CAP_VFW, etc...
     };
+
+    class IVideoWriter
+    {
+    public:
+        virtual ~IVideoWriter() {}
+        virtual double getProperty(int) const { return 0; }
+        virtual bool setProperty(int, double) { return false; }
+
+        virtual bool isOpened() const = 0;
+        virtual void write(InputArray) = 0;
+    };
+
+    Ptr<IVideoCapture> createMotionJpegCapture(const String& filename);
+    Ptr<IVideoWriter> createMotionJpegWriter( const String& filename, double fps, Size frameSize, bool iscolor );
 };
 
 #endif /* __VIDEOIO_H_ */
diff --git a/platforms/android/README.android b/platforms/android/README.android
index dd870b28e..564fc3a0f 100644
--- a/platforms/android/README.android
+++ b/platforms/android/README.android
@@ -1 +1 @@
-See http://opencv.org/android
+See http://opencv.org/platforms/android.html
diff --git a/platforms/android/libinfo/CMakeLists.txt b/platforms/android/libinfo/CMakeLists.txt
index 55dd27859..eea43621d 100644
--- a/platforms/android/libinfo/CMakeLists.txt
+++ b/platforms/android/libinfo/CMakeLists.txt
@@ -23,6 +23,9 @@ if(NOT ANDROID_PACKAGE_PLATFORM)
   endif()
 endif()
 
+string(REPLACE "-fvisibility=hidden" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+string(REPLACE "-fvisibility-inlines-hidden" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+
 add_definitions(-DANDROID_PACKAGE_RELEASE=${ANDROID_PACKAGE_RELEASE} -DANDROID_PACKAGE_PLATFORM="${ANDROID_PACKAGE_PLATFORM}")
 
 include_directories(jni/BinderComponent jni/include "${OpenCV_SOURCE_DIR}/modules/core/include")
diff --git a/platforms/android/service/engine/AndroidManifest.xml b/platforms/android/service/engine/AndroidManifest.xml
index 3ada75d0a..4f78c314a 100644
--- a/platforms/android/service/engine/AndroidManifest.xml
+++ b/platforms/android/service/engine/AndroidManifest.xml
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
     package="org.opencv.engine"
-    android:versionCode="218@ANDROID_PLATFORM_VERSION_CODE@"
-    android:versionName="2.18" >
+    android:versionCode="300@ANDROID_PLATFORM_VERSION_CODE@"
+    android:versionName="3.00" >
 
     <uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" />
     <uses-feature android:name="android.hardware.touchscreen" android:required="false"/>
@@ -27,4 +27,4 @@
         </intent-filter>
     </activity>
     </application>
-</manifest>
\ No newline at end of file
+</manifest>
diff --git a/platforms/android/service/engine/CMakeLists.txt b/platforms/android/service/engine/CMakeLists.txt
index b1cac9383..b09e1fa05 100644
--- a/platforms/android/service/engine/CMakeLists.txt
+++ b/platforms/android/service/engine/CMakeLists.txt
@@ -12,14 +12,16 @@ if(ARMEABI_V7A)
   else()
     set(ANDROID_PLATFORM_VERSION_CODE "3")
   endif()
+elseif(ARM64_V8A)
+  set(ANDROID_PLATFORM_VERSION_CODE "4")
 elseif(ARMEABI_V6)
   set(ANDROID_PLATFORM_VERSION_CODE "1")
 elseif(ARMEABI)
   set(ANDROID_PLATFORM_VERSION_CODE "1")
 elseif(X86)
-  set(ANDROID_PLATFORM_VERSION_CODE "4")
-elseif(MIPS)
   set(ANDROID_PLATFORM_VERSION_CODE "5")
+elseif(MIPS)
+  set(ANDROID_PLATFORM_VERSION_CODE "6")
 else()
   message(WARNING "Can not automatically determine the value for ANDROID_PLATFORM_VERSION_CODE")
 endif()
@@ -47,6 +49,8 @@ endif()
 add_definitions(-DPLATFORM_ANDROID -D__SUPPORT_ARMEABI_V7A_FEATURES -D__SUPPORT_TEGRA3 -D__SUPPORT_MIPS)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -fno-exceptions")
 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-allow-shlib-undefined")
+string(REPLACE "-fvisibility=hidden" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+string(REPLACE "-fvisibility-inlines-hidden" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
 include_directories("jni/BinderComponent" "jni/include")
 include_directories(SYSTEM "${ANDROID_SOURCE_TREE}/frameworks/base/include" "${ANDROID_SOURCE_TREE}/system/core/include")
diff --git a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp
index bb0a34cca..458584034 100644
--- a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp
+++ b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp
@@ -39,44 +39,63 @@ int GetCpuID()
 #else
     result = ARCH_UNKNOWN;
 #endif
+#elif defined(__aarch64__)
+#ifdef __SUPPORT_AARCH64
+    result |= ARCH_AARCH64;
 #else
+    result = ARCH_UNKNOWN;
+#endif
+#elif defined(__arm__)
     LOGD("Using ARM HW detector");
     it = cpu_info.find("Processor");
 
     if (cpu_info.end() != it)
     {
-        size_t proc_name_pos = it->second.find(CPU_INFO_ARCH_X86_STR);
+        size_t proc_name_pos = it->second.find(CPU_INFO_ARCH_ARMV7_STR);
         if (string::npos != proc_name_pos)
         {
+            result |= ARCH_ARMv7;
         }
         else
         {
-            proc_name_pos = it->second.find(CPU_INFO_ARCH_ARMV7_STR);
+            proc_name_pos = it->second.find(CPU_INFO_ARCH_ARMV6_STR);
             if (string::npos != proc_name_pos)
             {
-                result |= ARCH_ARMv7;
+                result |= ARCH_ARMv6;
             }
             else
             {
-                proc_name_pos = it->second.find(CPU_INFO_ARCH_ARMV6_STR);
+                proc_name_pos = it->second.find(CPU_INFO_ARCH_ARMV5_STR);
                 if (string::npos != proc_name_pos)
                 {
-                    result |= ARCH_ARMv6;
+                    result |= ARCH_ARMv5;
                 }
                 else
                 {
-                    proc_name_pos = it->second.find(CPU_INFO_ARCH_ARMV5_STR);
-                    if (string::npos != proc_name_pos)
-                    {
-                        result |= ARCH_ARMv5;
-                    }
+                    // Treat the arch of current binary. Google Play checks
+                    // device hardware before installation. Let's assume that
+                    // if the binary works, it's compatible with current hardware
+#if defined __ARM_ARCH_7A__
+                    result |= ARCH_ARMv7;
+                    result |= FEATURES_HAS_VFPv3d16;
+#else
+                    result |= ARCH_ARMv5;
+#endif
                 }
             }
         }
     }
     else
     {
-        return ARCH_UNKNOWN;
+         // Treat the arch of current binary. Google Play checks
+         // device hardware before installation. Let's assume that
+         // if the binary works, it's compatible with current hardware
+#if defined __ARM_ARCH_7A__
+        result |= ARCH_ARMv7;
+        result |= FEATURES_HAS_VFPv3;
+#else
+        result |= ARCH_ARMv5;
+#endif
     }
 
     it = cpu_info.find("Features");
@@ -107,7 +126,9 @@ int GetCpuID()
             }
         }
     }
-    #endif
+#else
+    result = ARCH_UNKNOWN;
+#endif
 
     return result;
 }
diff --git a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h
index 1e14ba701..0b8b9aabe 100644
--- a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h
+++ b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h
@@ -9,7 +9,7 @@
 #define ARCH_ARMv5 67108864L
 #define ARCH_ARMv6 134217728L
 #define ARCH_ARMv7 268435456L
-#define ARCH_ARMv8 536870912L
+#define ARCH_AARCH64 536870912L
 #define ARCH_MIPS 1073741824L
 
 #define FEATURES_HAS_VFPv3d16 1L
diff --git a/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp
index 2b113b4e2..c83f80548 100644
--- a/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp
+++ b/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp
@@ -15,7 +15,7 @@ using namespace android;
 
 const int OpenCVEngine::Platform = DetectKnownPlatforms();
 const int OpenCVEngine::CpuID = GetCpuID();
-const int OpenCVEngine::KnownVersions[] = {2040000, 2040100, 2040200, 2040300, 2040301, 2040302, 2040400, 2040500, 2040600, 2040700, 2040701, 2040800, 2040900};
+const int OpenCVEngine::KnownVersions[] = {2040000, 2040100, 2040200, 2040300, 2040301, 2040302, 2040400, 2040500, 2040600, 2040700, 2040701, 2040800, 2040900, 2041000, 2041100, 3000000};
 
 bool OpenCVEngine::ValidateVersion(int version)
 {
diff --git a/platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp b/platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp
index 5c5022ff4..69a36793b 100644
--- a/platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp
+++ b/platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp
@@ -208,6 +208,11 @@ std::vector<std::pair<int, int> > CommonPackageManager::InitArmRating()
     result.push_back(std::pair<int, int>(PLATFORM_TEGRA4,  ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv4 | FEATURES_HAS_NEON));
     result.push_back(std::pair<int, int>(PLATFORM_TEGRA5,  ARCH_ARMv7 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv4 | FEATURES_HAS_NEON));
 
+    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_AARCH64));
+    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_AARCH64 | FEATURES_HAS_VFPv3));
+    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_AARCH64 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv4));
+    result.push_back(std::pair<int, int>(PLATFORM_UNKNOWN, ARCH_AARCH64 | FEATURES_HAS_VFPv3 | FEATURES_HAS_VFPv3 | FEATURES_HAS_NEON));
+
     return result;
 }
 
diff --git a/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp b/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp
index ca364b444..d831bf7a5 100644
--- a/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp
+++ b/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp
@@ -302,20 +302,13 @@ PackageInfo::PackageInfo(int version, int platform, int cpu_id, std::string inst
                 }
                 #endif
             }
-            else if (ARCH_ARMv8 & CpuID)
+            #ifdef __SUPPORT_AARCH64
+            else if (ARCH_AARCH64 & CpuID)
             {
-                LOGD("PackageInfo::PackageInfo: package arch ARMv8");
-                #ifdef __SUPPORT_ARMEABI_V8
-                FullName += string("_") + ARCH_ARMv8_NAME;
-                #else
-                FullName += string("_") + ARCH_ARMv7_NAME;
-                #endif
-                //string features = JoinARMFeatures(CpuID);
-                //if (!features.empty())
-                //{
-                    //    FullName += string("_") + features;
-                //}
+                LOGD("PackageInfo::PackageInfo: package arch AARCH64");
+                FullName += string("_") + ARCH_AARCH64_NAME;
             }
+            #endif
             #ifdef __SUPPORT_MIPS
             else if (ARCH_MIPS & CpuID)
             {
@@ -460,14 +453,22 @@ InstallPath(install_path)
             {
                 CpuID = ARCH_ARMv7 | SplitARMFeatures(features);
             }
+            #ifdef __SUPPORT_AARCH64
+            else if (ARCH_AARCH64_NAME == features[2])
+            {
+                CpuID = ARCH_AARCH64 | SplitARMFeatures(features);
+            }
+            #endif
             else if (ARCH_X86_NAME == features[2])
             {
                 CpuID = ARCH_X86 | SplitIntelFeatures(features);
             }
+            #ifdef __SUPPORT_INTEL_x64
             else if (ARCH_X64_NAME == features[2])
             {
                 CpuID = ARCH_X64 | SplitIntelFeatures(features);
             }
+            #endif
             #ifdef __SUPPORT_MIPS
             else if (ARCH_MIPS_NAME == features[2])
             {
diff --git a/platforms/android/service/engine/jni/NativeService/PackageInfo.h b/platforms/android/service/engine/jni/NativeService/PackageInfo.h
index f94f0f382..7e31e16a1 100644
--- a/platforms/android/service/engine/jni/NativeService/PackageInfo.h
+++ b/platforms/android/service/engine/jni/NativeService/PackageInfo.h
@@ -10,7 +10,7 @@
 #define ARCH_ARMv5_NAME "armv5"
 #define ARCH_ARMv6_NAME "armv6"
 #define ARCH_ARMv7_NAME "armv7a"
-#define ARCH_ARMv8_NAME "armv8"
+#define ARCH_AARCH64_NAME "aarch64"
 
 #define FEATURES_HAS_VFPv3d16_NAME "vfpv3d16"
 #define FEATURES_HAS_VFPv3_NAME "vfpv3"
diff --git a/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp b/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp
index 8e7dfab00..fd276e9f9 100644
--- a/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp
+++ b/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp
@@ -146,11 +146,19 @@ TEST(CpuID, CheckMips)
     EXPECT_TRUE(cpu_id & ARCH_MIPS);
 }
 #endif
+#elif defined(__aarch64__)
+TEST(CpuID, CheckAarch64)
+{
+    int cpu_id = GetCpuID();
+    EXPECT_TRUE(cpu_id & ARCH_AARCH64);
+}
 #else
+# if defined(__arm__) && defined(USE_TEGRA_HW_DETECTOR)
 TEST(TegraDetector, Detect)
 {
     EXPECT_TRUE(DetectTegra() != 0);
 }
+# endif
 
 TEST(CpuID, CheckArmV7)
 {
diff --git a/platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp b/platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp
index de6b22453..253607ca8 100644
--- a/platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp
+++ b/platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp
@@ -52,6 +52,13 @@ TEST(PackageInfo, FullNameArmv7VFPv3Neon)
 #endif
 }
 
+TEST(PackageInfo, FullNameAarch64)
+{
+    PackageInfo info(2041000, PLATFORM_UNKNOWN, ARCH_AARCH64);
+    string name = info.GetFullName();
+    EXPECT_STREQ("org.opencv.lib_v24_aarch64", name.c_str());
+}
+
 TEST(PackageInfo, FullNameArmv5)
 {
     PackageInfo info(2030000, PLATFORM_UNKNOWN, ARCH_ARMv5);
diff --git a/platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp b/platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp
index 14295ecbc..33af96156 100644
--- a/platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp
+++ b/platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp
@@ -54,6 +54,16 @@ TEST(PackageManager, GetPackagePathForArmv7)
     EXPECT_STREQ("/data/data/org.opencv.lib_v23_armv7a/lib", path.c_str());
 }
 
+#ifdef __SUPPORT_AARCH64
+TEST(PackageManager, GetPackagePathForAarch64)
+{
+    PackageManagerStub pm;
+    EXPECT_TRUE(pm.InstallVersion(2041100, PLATFORM_UNKNOWN, ARCH_AARCH64));
+    string path = pm.GetPackagePathByVersion(2041100, PLATFORM_UNKNOWN, ARCH_AARCH64);
+    EXPECT_STREQ("/data/data/org.opencv.lib_v24_aarch64/lib", path.c_str());
+}
+#endif
+
 TEST(PackageManager, GetPackagePathForArmv7Neon)
 {
     PackageManagerStub pm;
diff --git a/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java b/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java
index 1d52f0cf0..f115070aa 100644
--- a/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java
+++ b/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java
@@ -10,7 +10,7 @@ public class HardwareDetector
     public static final int ARCH_ARMv5    = 0x04000000;
     public static final int ARCH_ARMv6    = 0x08000000;
     public static final int ARCH_ARMv7    = 0x10000000;
-    public static final int ARCH_ARMv8    = 0x20000000;
+    public static final int ARCH_AARCH64  = 0x20000000;
 
     public static final int ARCH_MIPS     = 0x40000000;
     // Platform specific features
diff --git a/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java
index 0cd2fd58e..b4e0be5a9 100644
--- a/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java
+++ b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java
@@ -7,6 +7,7 @@ import java.util.StringTokenizer;
 import org.opencv.engine.HardwareDetector;
 import org.opencv.engine.MarketConnector;
 import org.opencv.engine.OpenCVEngineInterface;
+import org.opencv.engine.OpenCVEngineService;
 import org.opencv.engine.OpenCVLibraryInfo;
 import org.opencv.engine.R;
 import android.annotation.TargetApi;
@@ -140,11 +141,11 @@ public class ManagerActivity extends Activity
             }
             else if ((CpuId & HardwareDetector.ARCH_ARMv7) == HardwareDetector.ARCH_ARMv7)
             {
-                HardwarePlatformView.setText("ARM v7 " + JoinArmFeatures(CpuId));
+                HardwarePlatformView.setText("ARM v7a " + JoinArmFeatures(CpuId));
             }
-            else if ((CpuId & HardwareDetector.ARCH_ARMv8) == HardwareDetector.ARCH_ARMv8)
+            else if ((CpuId & HardwareDetector.ARCH_AARCH64) == HardwareDetector.ARCH_AARCH64)
             {
-                HardwarePlatformView.setText("ARM v8 " + JoinArmFeatures(CpuId));
+                HardwarePlatformView.setText("AARCH64 (ARM64 v8a) " + JoinArmFeatures(CpuId));
             }
             else if ((CpuId & HardwareDetector.ARCH_MIPS) == HardwareDetector.ARCH_MIPS)
             {
@@ -220,7 +221,8 @@ public class ManagerActivity extends Activity
             public void onReceive(Context context, Intent intent) {
                 Log.d("OpenCVManager/Receiver", "Broadcast message " + intent.getAction() + " receiver");
                 Log.d("OpenCVManager/Receiver", "Filling package list on broadcast message");
-                if (!bindService(new Intent("org.opencv.engine.BIND"), new OpenCVEngineServiceConnection(), Context.BIND_AUTO_CREATE))
+                if (!bindService(new Intent("org.opencv.engine.BIND"),
+                     new OpenCVEngineServiceConnection(), Context.BIND_AUTO_CREATE))
                 {
                     TextView EngineVersionView = (TextView)findViewById(R.id.EngineVersionValue);
                     EngineVersionView.setText("not avaliable");
@@ -251,7 +253,7 @@ public class ManagerActivity extends Activity
         if (HardwareDetector.mIsReady) {
             Log.d(TAG, "Filling package list on resume");
             OpenCVEngineServiceConnection connection = new OpenCVEngineServiceConnection();
-            if (!bindService(new Intent("org.opencv.engine.BIND"), connection, Context.BIND_AUTO_CREATE)) {
+            if (!bindService(new Intent(this, OpenCVEngineService.class), connection, Context.BIND_AUTO_CREATE)) {
                 Log.e(TAG, "Cannot bind to OpenCV Manager service!");
                 TextView EngineVersionView = (TextView)findViewById(R.id.EngineVersionValue);
                 if (EngineVersionView != null)
@@ -304,6 +306,9 @@ public class ManagerActivity extends Activity
                 path = EngineService.getLibPathByVersion("2.5");
                 Log.d(TAG, "2.5 -> " + path);
                 mActivePackageMap.put("25", path);
+                path = EngineService.getLibPathByVersion("3.0");
+                Log.d(TAG, "3.0 -> " + path);
+                mActivePackageMap.put("30", path);
             } catch (RemoteException e) {
                 // TODO Auto-generated catch block
                 e.printStackTrace();
diff --git a/platforms/winrt/setup_winrt.ps1 b/platforms/winrt/setup_winrt.ps1
index b7ef93dcd..ddd82864d 100644
--- a/platforms/winrt/setup_winrt.ps1
+++ b/platforms/winrt/setup_winrt.ps1
@@ -1,17 +1,29 @@
-<#
-Copyright � Microsoft Open Technologies, Inc.
-All Rights Reserved
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
+﻿<#
+Copyright (c) Microsoft Open Technologies, Inc.
+All rights reserved.
 
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
+(3-clause BSD License)
 
-THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
-EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE,
-FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that
+the following conditions are met:
 
-See the Apache 2 License for the specific language governing permissions and limitations under the License.
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
+following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
+following disclaimer in the documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or
+promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
 #>
 
 [CmdletBinding()]
diff --git a/samples/android/15-puzzle/src/org/opencv/samples/puzzle15/Puzzle15Activity.java b/samples/android/15-puzzle/src/org/opencv/samples/puzzle15/Puzzle15Activity.java
index b59da5975..71f59fa13 100644
--- a/samples/android/15-puzzle/src/org/opencv/samples/puzzle15/Puzzle15Activity.java
+++ b/samples/android/15-puzzle/src/org/opencv/samples/puzzle15/Puzzle15Activity.java
@@ -78,7 +78,7 @@ public class Puzzle15Activity extends Activity implements CvCameraViewListener,
         super.onResume();
         if (!OpenCVLoader.initDebug()) {
             Log.d(TAG, "Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, this, mLoaderCallback);
         } else {
             Log.d(TAG, "OpenCV library found inside package. Using it!");
             mLoaderCallback.onManagerConnected(LoaderCallbackInterface.SUCCESS);
diff --git a/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrationActivity.java b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrationActivity.java
index 058497303..aa9050405 100644
--- a/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrationActivity.java
+++ b/samples/android/camera-calibration/src/org/opencv/samples/cameracalibration/CameraCalibrationActivity.java
@@ -94,7 +94,7 @@ public class CameraCalibrationActivity extends Activity implements CvCameraViewL
         super.onResume();
         if (!OpenCVLoader.initDebug()) {
             Log.d(TAG, "Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_2, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, this, mLoaderCallback);
         } else {
             Log.d(TAG, "OpenCV library found inside package. Using it!");
             mLoaderCallback.onManagerConnected(LoaderCallbackInterface.SUCCESS);
diff --git a/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetectionActivity.java b/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetectionActivity.java
index 82f6a68ac..97d348c9d 100644
--- a/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetectionActivity.java
+++ b/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetectionActivity.java
@@ -90,7 +90,7 @@ public class ColorBlobDetectionActivity extends Activity implements OnTouchListe
         super.onResume();
         if (!OpenCVLoader.initDebug()) {
             Log.d(TAG, "Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, this, mLoaderCallback);
         } else {
             Log.d(TAG, "OpenCV library found inside package. Using it!");
             mLoaderCallback.onManagerConnected(LoaderCallbackInterface.SUCCESS);
diff --git a/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java b/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java
index 7f641f7ae..48a89b0f8 100644
--- a/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java
+++ b/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java
@@ -142,7 +142,7 @@ public class FdActivity extends Activity implements CvCameraViewListener2 {
         super.onResume();
         if (!OpenCVLoader.initDebug()) {
             Log.d(TAG, "Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, this, mLoaderCallback);
         } else {
             Log.d(TAG, "OpenCV library found inside package. Using it!");
             mLoaderCallback.onManagerConnected(LoaderCallbackInterface.SUCCESS);
diff --git a/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java b/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java
index dee224a7e..382ce7503 100644
--- a/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java
+++ b/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java
@@ -113,7 +113,7 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView
         super.onResume();
         if (!OpenCVLoader.initDebug()) {
             Log.d(TAG, "Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, this, mLoaderCallback);
         } else {
             Log.d(TAG, "OpenCV library found inside package. Using it!");
             mLoaderCallback.onManagerConnected(LoaderCallbackInterface.SUCCESS);
diff --git a/samples/android/native-activity/src/org/opencv/samples/NativeActivity/CvNativeActivity.java b/samples/android/native-activity/src/org/opencv/samples/NativeActivity/CvNativeActivity.java
index 7d4de93b2..0ffb84c3a 100644
--- a/samples/android/native-activity/src/org/opencv/samples/NativeActivity/CvNativeActivity.java
+++ b/samples/android/native-activity/src/org/opencv/samples/NativeActivity/CvNativeActivity.java
@@ -41,7 +41,7 @@ public class CvNativeActivity extends Activity {
         super.onResume();
         if (!OpenCVLoader.initDebug()) {
             Log.d(TAG, "Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, this, mLoaderCallback);
         } else {
             Log.d(TAG, "OpenCV library found inside package. Using it!");
             mLoaderCallback.onManagerConnected(LoaderCallbackInterface.SUCCESS);
diff --git a/samples/android/tutorial-1-camerapreview/src/org/opencv/samples/tutorial1/Tutorial1Activity.java b/samples/android/tutorial-1-camerapreview/src/org/opencv/samples/tutorial1/Tutorial1Activity.java
index 746eb4077..f0f2fb28c 100644
--- a/samples/android/tutorial-1-camerapreview/src/org/opencv/samples/tutorial1/Tutorial1Activity.java
+++ b/samples/android/tutorial-1-camerapreview/src/org/opencv/samples/tutorial1/Tutorial1Activity.java
@@ -78,7 +78,7 @@ public class Tutorial1Activity extends Activity implements CvCameraViewListener2
         super.onResume();
         if (!OpenCVLoader.initDebug()) {
             Log.d(TAG, "Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, this, mLoaderCallback);
         } else {
             Log.d(TAG, "OpenCV library found inside package. Using it!");
             mLoaderCallback.onManagerConnected(LoaderCallbackInterface.SUCCESS);
diff --git a/samples/android/tutorial-2-mixedprocessing/src/org/opencv/samples/tutorial2/Tutorial2Activity.java b/samples/android/tutorial-2-mixedprocessing/src/org/opencv/samples/tutorial2/Tutorial2Activity.java
index 1dbcff2ca..d8bb28a7e 100644
--- a/samples/android/tutorial-2-mixedprocessing/src/org/opencv/samples/tutorial2/Tutorial2Activity.java
+++ b/samples/android/tutorial-2-mixedprocessing/src/org/opencv/samples/tutorial2/Tutorial2Activity.java
@@ -99,7 +99,7 @@ public class Tutorial2Activity extends Activity implements CvCameraViewListener2
         super.onResume();
         if (!OpenCVLoader.initDebug()) {
             Log.d(TAG, "Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, this, mLoaderCallback);
         } else {
             Log.d(TAG, "OpenCV library found inside package. Using it!");
             mLoaderCallback.onManagerConnected(LoaderCallbackInterface.SUCCESS);
diff --git a/samples/android/tutorial-3-cameracontrol/src/org/opencv/samples/tutorial3/Tutorial3Activity.java b/samples/android/tutorial-3-cameracontrol/src/org/opencv/samples/tutorial3/Tutorial3Activity.java
index 8e3e16259..1800bfe1d 100644
--- a/samples/android/tutorial-3-cameracontrol/src/org/opencv/samples/tutorial3/Tutorial3Activity.java
+++ b/samples/android/tutorial-3-cameracontrol/src/org/opencv/samples/tutorial3/Tutorial3Activity.java
@@ -90,7 +90,7 @@ public class Tutorial3Activity extends Activity implements CvCameraViewListener2
         super.onResume();
         if (!OpenCVLoader.initDebug()) {
             Log.d(TAG, "Internal OpenCV library not found. Using OpenCV Manager for initialization");
-            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback);
+            OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_3_0_0, this, mLoaderCallback);
         } else {
             Log.d(TAG, "OpenCV library found inside package. Using it!");
             mLoaderCallback.onManagerConnected(LoaderCallbackInterface.SUCCESS);
diff --git a/samples/cpp/lkdemo.cpp b/samples/cpp/lkdemo.cpp
index 82c1fac2d..2f576c3ba 100644
--- a/samples/cpp/lkdemo.cpp
+++ b/samples/cpp/lkdemo.cpp
@@ -61,12 +61,11 @@ int main( int argc, char** argv )
     namedWindow( "LK Demo", 1 );
     setMouseCallback( "LK Demo", onMouse, 0 );
 
-    Mat gray, prevGray, image;
+    Mat gray, prevGray, image, frame;
     vector<Point2f> points[2];
 
     for(;;)
     {
-        Mat frame;
         cap >> frame;
         if( frame.empty() )
             break;
diff --git a/samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp b/samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp
index 34e2504c6..8059a4aec 100644
--- a/samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp
+++ b/samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp
@@ -34,7 +34,8 @@ public:
 
     void write(FileStorage& fs) const                        //Write serialization for this class
     {
-        fs << "{" << "BoardSize_Width"  << boardSize.width
+        fs << "{"
+                  << "BoardSize_Width"  << boardSize.width
                   << "BoardSize_Height" << boardSize.height
                   << "Square_Size"         << squareSize
                   << "Calibrate_Pattern" << patternToUse
@@ -43,8 +44,8 @@ public:
                   << "Calibrate_AssumeZeroTangentialDistortion" << calibZeroTangentDist
                   << "Calibrate_FixPrincipalPointAtTheCenter" << calibFixPrincipalPoint
 
-                  << "Write_DetectedFeaturePoints" << bwritePoints
-                  << "Write_extrinsicParameters"   << bwriteExtrinsics
+                  << "Write_DetectedFeaturePoints" << writePoints
+                  << "Write_extrinsicParameters"   << writeExtrinsics
                   << "Write_outputFileName"  << outputFileName
 
                   << "Show_UndistortedImage" << showUndistorsed
@@ -62,8 +63,8 @@ public:
         node["Square_Size"]  >> squareSize;
         node["Calibrate_NrOfFrameToUse"] >> nrFrames;
         node["Calibrate_FixAspectRatio"] >> aspectRatio;
-        node["Write_DetectedFeaturePoints"] >> bwritePoints;
-        node["Write_extrinsicParameters"] >> bwriteExtrinsics;
+        node["Write_DetectedFeaturePoints"] >> writePoints;
+        node["Write_extrinsicParameters"] >> writeExtrinsics;
         node["Write_outputFileName"] >> outputFileName;
         node["Calibrate_AssumeZeroTangentialDistortion"] >> calibZeroTangentDist;
         node["Calibrate_FixPrincipalPointAtTheCenter"] >> calibFixPrincipalPoint;
@@ -71,9 +72,9 @@ public:
         node["Show_UndistortedImage"] >> showUndistorsed;
         node["Input"] >> input;
         node["Input_Delay"] >> delay;
-        interprate();
+        validate();
     }
-    void interprate()
+    void validate()
     {
         goodInput = true;
         if (boardSize.width <= 0 || boardSize.height <= 0)
@@ -105,10 +106,10 @@ public:
             else
             {
                 if (readStringList(input, imageList))
-                    {
-                        inputType = IMAGE_LIST;
-                        nrFrames = (nrFrames < (int)imageList.size()) ? nrFrames : (int)imageList.size();
-                    }
+                {
+                    inputType = IMAGE_LIST;
+                    nrFrames = (nrFrames < (int)imageList.size()) ? nrFrames : (int)imageList.size();
+                }
                 else
                     inputType = VIDEO_FILE;
             }
@@ -121,7 +122,7 @@ public:
         }
         if (inputType == INVALID)
         {
-            cerr << " Inexistent input: " << input;
+            cerr << " Input does not exist: " << input;
             goodInput = false;
         }
 
@@ -136,10 +137,10 @@ public:
         if (!patternToUse.compare("CIRCLES_GRID")) calibrationPattern = CIRCLES_GRID;
         if (!patternToUse.compare("ASYMMETRIC_CIRCLES_GRID")) calibrationPattern = ASYMMETRIC_CIRCLES_GRID;
         if (calibrationPattern == NOT_EXISTING)
-            {
-                cerr << " Inexistent camera calibration mode: " << patternToUse << endl;
-                goodInput = false;
-            }
+        {
+            cerr << " Camera calibration mode does not exist: " << patternToUse << endl;
+            goodInput = false;
+        }
         atImageList = 0;
 
     }
@@ -152,7 +153,7 @@ public:
             inputCapture >> view0;
             view0.copyTo(result);
         }
-        else if( atImageList < (int)imageList.size() )
+        else if( atImageList < imageList.size() )
             result = imread(imageList[atImageList++], IMREAD_COLOR);
 
         return result;
@@ -173,26 +174,24 @@ public:
         return true;
     }
 public:
-    Size boardSize;            // The size of the board -> Number of items by width and height
-    Pattern calibrationPattern;// One of the Chessboard, circles, or asymmetric circle pattern
-    float squareSize;          // The size of a square in your defined unit (point, millimeter,etc).
-    int nrFrames;              // The number of frames to use from the input for calibration
-    float aspectRatio;         // The aspect ratio
-    int delay;                 // In case of a video input
-    bool bwritePoints;         //  Write detected feature points
-    bool bwriteExtrinsics;     // Write extrinsic parameters
-    bool calibZeroTangentDist; // Assume zero tangential distortion
-    bool calibFixPrincipalPoint;// Fix the principal point at the center
-    bool flipVertical;          // Flip the captured images around the horizontal axis
-    string outputFileName;      // The name of the file where to write
-    bool showUndistorsed;       // Show undistorted images after calibration
-    string input;               // The input ->
-
-
+    Size boardSize;              // The size of the board -> Number of items by width and height
+    Pattern calibrationPattern;  // One of the Chessboard, circles, or asymmetric circle pattern
+    float squareSize;            // The size of a square in your defined unit (point, millimeter,etc).
+    int nrFrames;                // The number of frames to use from the input for calibration
+    float aspectRatio;           // The aspect ratio
+    int delay;                   // In case of a video input
+    bool writePoints;            // Write detected feature points
+    bool writeExtrinsics;        // Write extrinsic parameters
+    bool calibZeroTangentDist;   // Assume zero tangential distortion
+    bool calibFixPrincipalPoint; // Fix the principal point at the center
+    bool flipVertical;           // Flip the captured images around the horizontal axis
+    string outputFileName;       // The name of the file where to write
+    bool showUndistorsed;        // Show undistorted images after calibration
+    string input;                // The input ->
 
     int cameraID;
     vector<string> imageList;
-    int atImageList;
+    size_t atImageList;
     VideoCapture inputCapture;
     InputType inputType;
     bool goodInput;
@@ -204,7 +203,7 @@ private:
 
 };
 
-static void read(const FileNode& node, Settings& x, const Settings& default_value = Settings())
+static inline void read(const FileNode& node, Settings& x, const Settings& default_value = Settings())
 {
     if(node.empty())
         x = default_value;
@@ -212,6 +211,11 @@ static void read(const FileNode& node, Settings& x, const Settings& default_valu
         x.read(node);
 }
 
+static inline void write(FileStorage& fs, const String&, const Settings& s )
+{
+    s.write(fs);
+}
+
 enum { DETECTION = 0, CAPTURING = 1, CALIBRATED = 2 };
 
 bool runCalibrationAndSave(Settings& s, Size imageSize, Mat&  cameraMatrix, Mat& distCoeffs,
@@ -220,6 +224,8 @@ bool runCalibrationAndSave(Settings& s, Size imageSize, Mat&  cameraMatrix, Mat&
 int main(int argc, char* argv[])
 {
     help();
+
+    //! [file_read]
     Settings s;
     const string inputSettingsFile = argc > 1 ? argv[1] : "default.xml";
     FileStorage fs(inputSettingsFile, FileStorage::READ); // Read the settings
@@ -230,6 +236,10 @@ int main(int argc, char* argv[])
     }
     fs["Settings"] >> s;
     fs.release();                                         // close Settings file
+    //! [file_read]
+
+    //FileStorage fout("settings.yml", FileStorage::WRITE); // write config as YAML
+    //fout << "Settings" << s;
 
     if (!s.goodInput)
     {
@@ -245,32 +255,35 @@ int main(int argc, char* argv[])
     const Scalar RED(0,0,255), GREEN(0,255,0);
     const char ESC_KEY = 27;
 
-    for(int i = 0;;++i)
+    //! [get_input]
+    for(;;)
     {
-      Mat view;
-      bool blinkOutput = false;
+        Mat view;
+        bool blinkOutput = false;
 
-      view = s.nextImage();
+        view = s.nextImage();
 
-      //-----  If no more image, or got enough, then stop calibration and show result -------------
-      if( mode == CAPTURING && imagePoints.size() >= (unsigned)s.nrFrames )
-      {
+        //-----  If no more image, or got enough, then stop calibration and show result -------------
+        if( mode == CAPTURING && imagePoints.size() >= (size_t)s.nrFrames )
+        {
           if( runCalibrationAndSave(s, imageSize,  cameraMatrix, distCoeffs, imagePoints))
               mode = CALIBRATED;
           else
               mode = DETECTION;
-      }
-      if(view.empty())          // If no more images then run calibration, save and stop loop.
-      {
-            if( imagePoints.size() > 0 )
+        }
+        if(view.empty())          // If there are no more images stop the loop
+        {
+            // if calibration threshold was not reached yet, calibrate now
+            if( mode != CALIBRATED && !imagePoints.empty() )
                 runCalibrationAndSave(s, imageSize,  cameraMatrix, distCoeffs, imagePoints);
             break;
-      }
-
+        }
+        //! [get_input]
 
         imageSize = view.size();  // Format input image.
         if( s.flipVertical )    flip( view, view, 0 );
 
+        //! [find_pattern]
         vector<Point2f> pointBuf;
 
         bool found;
@@ -290,7 +303,8 @@ int main(int argc, char* argv[])
             found = false;
             break;
         }
-
+        //! [find_pattern]
+        //! [pattern_found]
         if ( found)                // If done with success,
         {
               // improve the found corners' coordinate accuracy for chessboard
@@ -313,8 +327,9 @@ int main(int argc, char* argv[])
                 // Draw the corners.
                 drawChessboardCorners( view, s.boardSize, Mat(pointBuf), found );
         }
-
+        //! [pattern_found]
         //----------------------------- Output Text ------------------------------------------------
+        //! [output_text]
         string msg = (mode == CAPTURING) ? "100/100" :
                       mode == CALIBRATED ? "Calibrated" : "Press 'g' to start";
         int baseLine = 0;
@@ -333,15 +348,17 @@ int main(int argc, char* argv[])
 
         if( blinkOutput )
             bitwise_not(view, view);
-
+        //! [output_text]
         //------------------------- Video capture  output  undistorted ------------------------------
+        //! [output_undistorted]
         if( mode == CALIBRATED && s.showUndistorsed )
         {
             Mat temp = view.clone();
             undistort(temp, view, cameraMatrix, distCoeffs);
         }
-
+        //! [output_undistorted]
         //------------------------------ Show image and check for input commands -------------------
+        //! [await_input]
         imshow("Image View", view);
         char key = (char)waitKey(s.inputCapture.isOpened() ? 50 : s.delay);
 
@@ -356,9 +373,11 @@ int main(int argc, char* argv[])
             mode = CAPTURING;
             imagePoints.clear();
         }
+        //! [await_input]
     }
 
     // -----------------------Show the undistorted image for the image list ------------------------
+    //! [show_results]
     if( s.inputType == Settings::IMAGE_LIST && s.showUndistorsed )
     {
         Mat view, rview, map1, map2;
@@ -366,7 +385,7 @@ int main(int argc, char* argv[])
             getOptimalNewCameraMatrix(cameraMatrix, distCoeffs, imageSize, 1, imageSize, 0),
             imageSize, CV_16SC2, map1, map2);
 
-        for(int i = 0; i < (int)s.imageList.size(); i++ )
+        for(size_t i = 0; i < s.imageList.size(); i++ )
         {
             view = imread(s.imageList[i], 1);
             if(view.empty())
@@ -378,11 +397,12 @@ int main(int argc, char* argv[])
                 break;
         }
     }
-
+    //! [show_results]
 
     return 0;
 }
 
+//! [compute_errors]
 static double computeReprojectionErrors( const vector<vector<Point3f> >& objectPoints,
                                          const vector<vector<Point2f> >& imagePoints,
                                          const vector<Mat>& rvecs, const vector<Mat>& tvecs,
@@ -390,17 +410,16 @@ static double computeReprojectionErrors( const vector<vector<Point3f> >& objectP
                                          vector<float>& perViewErrors)
 {
     vector<Point2f> imagePoints2;
-    int i, totalPoints = 0;
+    size_t totalPoints = 0;
     double totalErr = 0, err;
     perViewErrors.resize(objectPoints.size());
 
-    for( i = 0; i < (int)objectPoints.size(); ++i )
+    for(size_t i = 0; i < objectPoints.size(); ++i )
     {
-        projectPoints( Mat(objectPoints[i]), rvecs[i], tvecs[i], cameraMatrix,
-                       distCoeffs, imagePoints2);
-        err = norm(Mat(imagePoints[i]), Mat(imagePoints2), NORM_L2);
+        projectPoints(objectPoints[i], rvecs[i], tvecs[i], cameraMatrix, distCoeffs, imagePoints2);
+        err = norm(imagePoints[i], imagePoints2, NORM_L2);
 
-        int n = (int)objectPoints[i].size();
+        size_t n = objectPoints[i].size();
         perViewErrors[i] = (float) std::sqrt(err*err/n);
         totalErr        += err*err;
         totalPoints     += n;
@@ -408,7 +427,8 @@ static double computeReprojectionErrors( const vector<vector<Point3f> >& objectP
 
     return std::sqrt(totalErr/totalPoints);
 }
-
+//! [compute_errors]
+//! [board_corners]
 static void calcBoardCornerPositions(Size boardSize, float squareSize, vector<Point3f>& corners,
                                      Settings::Pattern patternType /*= Settings::CHESSBOARD*/)
 {
@@ -420,28 +440,28 @@ static void calcBoardCornerPositions(Size boardSize, float squareSize, vector<Po
     case Settings::CIRCLES_GRID:
         for( int i = 0; i < boardSize.height; ++i )
             for( int j = 0; j < boardSize.width; ++j )
-                corners.push_back(Point3f(float( j*squareSize ), float( i*squareSize ), 0));
+                corners.push_back(Point3f(j*squareSize, i*squareSize, 0));
         break;
 
     case Settings::ASYMMETRIC_CIRCLES_GRID:
         for( int i = 0; i < boardSize.height; i++ )
             for( int j = 0; j < boardSize.width; j++ )
-                corners.push_back(Point3f(float((2*j + i % 2)*squareSize), float(i*squareSize), 0));
+                corners.push_back(Point3f((2*j + i % 2)*squareSize, i*squareSize, 0));
         break;
     default:
         break;
     }
 }
-
+//! [board_corners]
 static bool runCalibration( Settings& s, Size& imageSize, Mat& cameraMatrix, Mat& distCoeffs,
                             vector<vector<Point2f> > imagePoints, vector<Mat>& rvecs, vector<Mat>& tvecs,
                             vector<float>& reprojErrs,  double& totalAvgErr)
 {
-
+    //! [fixed_aspect]
     cameraMatrix = Mat::eye(3, 3, CV_64F);
     if( s.flag & CALIB_FIX_ASPECT_RATIO )
-        cameraMatrix.at<double>(0,0) = 1.0;
-
+        cameraMatrix.at<double>(0,0) = s.aspectRatio;
+    //! [fixed_aspect]
     distCoeffs = Mat::zeros(8, 1, CV_64F);
 
     vector<vector<Point3f> > objectPoints(1);
@@ -475,49 +495,48 @@ static void saveCameraParams( Settings& s, Size& imageSize, Mat& cameraMatrix, M
     time( &tm );
     struct tm *t2 = localtime( &tm );
     char buf[1024];
-    strftime( buf, sizeof(buf)-1, "%c", t2 );
+    strftime( buf, sizeof(buf), "%c", t2 );
 
-    fs << "calibration_Time" << buf;
+    fs << "calibration_time" << buf;
 
     if( !rvecs.empty() || !reprojErrs.empty() )
-        fs << "nrOfFrames" << (int)std::max(rvecs.size(), reprojErrs.size());
-    fs << "image_Width" << imageSize.width;
-    fs << "image_Height" << imageSize.height;
-    fs << "board_Width" << s.boardSize.width;
-    fs << "board_Height" << s.boardSize.height;
-    fs << "square_Size" << s.squareSize;
+        fs << "nr_of_frames" << (int)std::max(rvecs.size(), reprojErrs.size());
+    fs << "image_width" << imageSize.width;
+    fs << "image_height" << imageSize.height;
+    fs << "board_width" << s.boardSize.width;
+    fs << "board_height" << s.boardSize.height;
+    fs << "square_size" << s.squareSize;
 
     if( s.flag & CALIB_FIX_ASPECT_RATIO )
-        fs << "FixAspectRatio" << s.aspectRatio;
+        fs << "fix_aspect_ratio" << s.aspectRatio;
 
-    if( s.flag )
+    if (s.flag)
     {
-        sprintf( buf, "flags: %s%s%s%s",
-            s.flag & CALIB_USE_INTRINSIC_GUESS ? " +use_intrinsic_guess" : "",
-            s.flag & CALIB_FIX_ASPECT_RATIO ? " +fix_aspectRatio" : "",
-            s.flag & CALIB_FIX_PRINCIPAL_POINT ? " +fix_principal_point" : "",
-            s.flag & CALIB_ZERO_TANGENT_DIST ? " +zero_tangent_dist" : "" );
-        //cvWriteComment( *fs, buf, 0 );
-
+        sprintf(buf, "flags: %s%s%s%s",
+                s.flag & CALIB_USE_INTRINSIC_GUESS ? " +use_intrinsic_guess" : "",
+                s.flag & CALIB_FIX_ASPECT_RATIO ? " +fix_aspect_ratio" : "",
+                s.flag & CALIB_FIX_PRINCIPAL_POINT ? " +fix_principal_point" : "",
+                s.flag & CALIB_ZERO_TANGENT_DIST ? " +zero_tangent_dist" : "");
+        cvWriteComment(*fs, buf, 0);
     }
 
-    fs << "flagValue" << s.flag;
+    fs << "flags" << s.flag;
 
-    fs << "Camera_Matrix" << cameraMatrix;
-    fs << "Distortion_Coefficients" << distCoeffs;
+    fs << "camera_matrix" << cameraMatrix;
+    fs << "distortion_coefficients" << distCoeffs;
 
-    fs << "Avg_Reprojection_Error" << totalAvgErr;
-    if( !reprojErrs.empty() )
-        fs << "Per_View_Reprojection_Errors" << Mat(reprojErrs);
+    fs << "avg_reprojection_error" << totalAvgErr;
+    if (s.writeExtrinsics && !reprojErrs.empty())
+        fs << "per_view_reprojection_errors" << Mat(reprojErrs);
 
-    if( !rvecs.empty() && !tvecs.empty() )
+    if(s.writeExtrinsics && !rvecs.empty() && !tvecs.empty() )
     {
         CV_Assert(rvecs[0].type() == tvecs[0].type());
         Mat bigmat((int)rvecs.size(), 6, rvecs[0].type());
-        for( int i = 0; i < (int)rvecs.size(); i++ )
+        for( size_t i = 0; i < rvecs.size(); i++ )
         {
-            Mat r = bigmat(Range(i, i+1), Range(0,3));
-            Mat t = bigmat(Range(i, i+1), Range(3,6));
+            Mat r = bigmat(Range(int(i), int(i+1)), Range(0,3));
+            Mat t = bigmat(Range(int(i), int(i+1)), Range(3,6));
 
             CV_Assert(rvecs[i].rows == 3 && rvecs[i].cols == 1);
             CV_Assert(tvecs[i].rows == 3 && tvecs[i].cols == 1);
@@ -526,35 +545,38 @@ static void saveCameraParams( Settings& s, Size& imageSize, Mat& cameraMatrix, M
             t = tvecs[i].t();
         }
         //cvWriteComment( *fs, "a set of 6-tuples (rotation vector + translation vector) for each view", 0 );
-        fs << "Extrinsic_Parameters" << bigmat;
+        fs << "extrinsic_parameters" << bigmat;
     }
 
-    if( !imagePoints.empty() )
+    if(s.writePoints && !imagePoints.empty() )
     {
         Mat imagePtMat((int)imagePoints.size(), (int)imagePoints[0].size(), CV_32FC2);
-        for( int i = 0; i < (int)imagePoints.size(); i++ )
+        for( size_t i = 0; i < imagePoints.size(); i++ )
         {
-            Mat r = imagePtMat.row(i).reshape(2, imagePtMat.cols);
+            Mat r = imagePtMat.row(int(i)).reshape(2, imagePtMat.cols);
             Mat imgpti(imagePoints[i]);
             imgpti.copyTo(r);
         }
-        fs << "Image_points" << imagePtMat;
+        fs << "image_points" << imagePtMat;
     }
 }
 
-bool runCalibrationAndSave(Settings& s, Size imageSize, Mat&  cameraMatrix, Mat& distCoeffs,vector<vector<Point2f> > imagePoints )
+//! [run_and_save]
+bool runCalibrationAndSave(Settings& s, Size imageSize, Mat& cameraMatrix, Mat& distCoeffs,
+                           vector<vector<Point2f> > imagePoints)
 {
     vector<Mat> rvecs, tvecs;
     vector<float> reprojErrs;
     double totalAvgErr = 0;
 
-    bool ok = runCalibration(s,imageSize, cameraMatrix, distCoeffs, imagePoints, rvecs, tvecs,
-                             reprojErrs, totalAvgErr);
+    bool ok = runCalibration(s, imageSize, cameraMatrix, distCoeffs, imagePoints, rvecs, tvecs, reprojErrs,
+                             totalAvgErr);
     cout << (ok ? "Calibration succeeded" : "Calibration failed")
-        << ". avg re projection error = "  << totalAvgErr ;
+         << ". avg re projection error = " << totalAvgErr << endl;
 
-    if( ok )
-        saveCameraParams( s, imageSize, cameraMatrix, distCoeffs, rvecs ,tvecs, reprojErrs,
-                            imagePoints, totalAvgErr);
+    if (ok)
+        saveCameraParams(s, imageSize, cameraMatrix, distCoeffs, rvecs, tvecs, reprojErrs, imagePoints,
+                         totalAvgErr);
     return ok;
 }
+//! [run_and_save]
diff --git a/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.cpp b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.cpp
index 438b2c6d1..56193c1cb 100644
--- a/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.cpp
+++ b/samples/winrt/ImageManipulations/MediaExtensions/OcvTransform/OcvTransform.cpp
@@ -120,13 +120,24 @@ HRESULT OcvImageManipulations::SetProperties(ABI::Windows::Foundation::Collectio
 
     if (found)
     {
-        IInspectable* value;
-        spSetting->Lookup(key, &value);
+        Microsoft::WRL::ComPtr<ABI::Windows::Foundation::IPropertyValue> spPropVal;
+        Microsoft::WRL::ComPtr<IInspectable> spInsp;
+
+        spSetting->Lookup(key, spInsp.ReleaseAndGetAddressOf());
+
+        hr = spInsp.As(&spPropVal);
+        if (hr != S_OK)
+        {
+            return hr;
+        }
+
+        INT32 effect;
+        hr = spPropVal->GetInt32(&effect);
+        if (hr != S_OK)
+        {
+            return hr;
+        }
 
-        Microsoft::WRL::ComPtr<ABI::Windows::Foundation::IReference<int>> ref;
-        hr = value->QueryInterface(IID_PPV_ARGS(&ref));
-        int effect = InvalidEffect;
-        hr = ref->get_Value(&effect);
         if ((effect >= 0) && (effect < InvalidEffect))
         {
             m_TransformType = (ProcessingType)effect;
diff --git a/samples/winrt/JavaScript/MediaCaptureJavaScript.jsproj b/samples/winrt/JavaScript/MediaCaptureJavaScript.jsproj
new file mode 100644
index 000000000..8bf1b03d9
--- /dev/null
+++ b/samples/winrt/JavaScript/MediaCaptureJavaScript.jsproj
@@ -0,0 +1,115 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|AnyCPU">
+      <Configuration>Debug</Configuration>
+      <Platform>AnyCPU</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x86">
+      <Configuration>Debug</Configuration>
+      <Platform>x86</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|AnyCPU">
+      <Configuration>Release</Configuration>
+      <Platform>AnyCPU</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM">
+      <Configuration>Release</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x86">
+      <Configuration>Release</Configuration>
+      <Platform>x86</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>d70a3790-48ce-4e58-af60-ebefc22e9c7a</ProjectGuid>
+  </PropertyGroup>
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup Condition="'$(VisualStudioVersion)' == '' or '$(VisualStudioVersion)' &lt; '12.0'">
+    <VisualStudioVersion>12.0</VisualStudioVersion>
+  </PropertyGroup>
+  <Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\$(WMSJSProjectDirectory)\Microsoft.VisualStudio.$(WMSJSProject).Default.props" />
+  <Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\$(WMSJSProjectDirectory)\Microsoft.VisualStudio.$(WMSJSProject).props" />
+  <PropertyGroup>
+    <TargetPlatformIdentifier>Windows</TargetPlatformIdentifier>
+    <TargetPlatformVersion>8.1</TargetPlatformVersion>
+    <RequiredPlatformVersion>8.1</RequiredPlatformVersion>
+    <MinimumVisualStudioVersion>$(VersionNumberMajor).$(VersionNumberMinor)</MinimumVisualStudioVersion>
+    <DefaultLanguage>en-US</DefaultLanguage>
+    <PackageCertificateKeyFile>MediaCaptureJavaScript_TemporaryKey.pfx</PackageCertificateKeyFile>
+  </PropertyGroup>
+  <ItemGroup>
+    <AppxManifest Include="package.appxmanifest">
+      <SubType>Designer</SubType>
+    </AppxManifest>
+    <Content Include="default.html" />
+    <Content Include="html\AdvancedCapture.html" />
+    <Content Include="images\logo.scale-100.png" />
+    <Content Include="images\microsoft-sdk.png" />
+    <Content Include="images\smalllogo.scale-100.png" />
+    <Content Include="images\smallTile-sdk.png" />
+    <Content Include="images\splash-sdk.png" />
+    <Content Include="images\splashscreen.scale-100.png" />
+    <Content Include="images\squareTile-sdk.png" />
+    <Content Include="images\storeLogo-sdk.png" />
+    <Content Include="images\storelogo.scale-100.png" />
+    <Content Include="images\tile-sdk.png" />
+    <Content Include="images\windows-sdk.png" />
+    <Content Include="js\AdvancedCapture.js" />
+    <Content Include="js\default.js" />
+    <Content Include="css\default.css" />
+    <Content Include="sample-utils\sample-utils.css" />
+    <Content Include="sample-utils\sample-utils.js" />
+    <Content Include="sample-utils\scenario-select.html" />
+    <None Include="MediaCaptureJavaScript_TemporaryKey.pfx" />
+  </ItemGroup>
+  <ItemGroup>
+    <SDKReference Include="Microsoft.WinJS.2.0, Version=1.0" />
+  </ItemGroup>
+  <PropertyGroup Label="UserMacros">
+    <OpenCV_Bin>$(OPENCV_WINRT_INSTALL_DIR)WS\8.1\$(PlatformTarget)\$(PlatformTarget)\vc12\bin\</OpenCV_Bin>
+    <OpenCV_Lib>$(OPENCV_WINRT_INSTALL_DIR)WS\8.1\$(PlatformTarget)\$(PlatformTarget)\vc12\lib\</OpenCV_Lib>
+    <OpenCV_Include>$(OPENCV_WINRT_INSTALL_DIR)WS\8.1\$(PlatformTarget)\include\</OpenCV_Include>
+    <!--debug suffix for OpenCV dlls and libs -->
+    <DebugSuffix Condition="'$(Configuration)'=='Debug'">d</DebugSuffix>
+    <DebugSuffix Condition="'$(Configuration)'!='Debug'">
+    </DebugSuffix>
+  </PropertyGroup>
+  <ItemGroup>
+    <!--Add required OpenCV dlls here-->
+    <Content Include="$(OpenCV_Bin)opencv_core300$(DebugSuffix).dll">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
+    <Content Include="$(OpenCV_Bin)opencv_imgproc300$(DebugSuffix).dll">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\ImageManipulations\MediaExtensions\OcvTransform\OcvTransform.vcxproj" />
+  </ItemGroup>
+  <Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\$(WMSJSProjectDirectory)\Microsoft.VisualStudio.$(WMSJSProject).targets" />
+  <!-- To modify your build process, add your task inside one of the targets below then uncomment
+       that target and the DisableFastUpToDateCheck PropertyGroup.
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  <PropertyGroup>
+    <DisableFastUpToDateCheck>true</DisableFastUpToDateCheck>
+  </PropertyGroup>
+  -->
+</Project>
\ No newline at end of file
diff --git a/samples/winrt/JavaScript/MediaCaptureJavaScript.sln b/samples/winrt/JavaScript/MediaCaptureJavaScript.sln
new file mode 100644
index 000000000..cb5c347fa
--- /dev/null
+++ b/samples/winrt/JavaScript/MediaCaptureJavaScript.sln
@@ -0,0 +1,84 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.31101.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OcvTransform", "..\ImageManipulations\MediaExtensions\OcvTransform\OcvTransform.vcxproj", "{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}"
+EndProject
+Project("{262852C6-CD72-467D-83FE-5EEB1973A190}") = "MediaCaptureJavaScript", "MediaCaptureJavaScript.jsproj", "{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Debug|ARM = Debug|ARM
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Any CPU = Release|Any CPU
+		Release|ARM = Release|ARM
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|Any CPU.ActiveCfg = Debug|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|ARM.ActiveCfg = Debug|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|ARM.Build.0 = Debug|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|ARM.Deploy.0 = Debug|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|Win32.ActiveCfg = Debug|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|Win32.Build.0 = Debug|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|Win32.Deploy.0 = Debug|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|x64.ActiveCfg = Debug|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|x64.Build.0 = Debug|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Debug|x64.Deploy.0 = Debug|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|Any CPU.ActiveCfg = Release|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|ARM.ActiveCfg = Release|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|ARM.Build.0 = Release|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|ARM.Deploy.0 = Release|ARM
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|Win32.ActiveCfg = Release|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|Win32.Build.0 = Release|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|Win32.Deploy.0 = Release|Win32
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|x64.ActiveCfg = Release|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|x64.Build.0 = Release|x64
+		{C5B886A7-8300-46FF-B533-9613DE2AF637}.Release|x64.Deploy.0 = Release|x64
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|Any CPU.ActiveCfg = Debug|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|ARM.ActiveCfg = Debug|ARM
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|ARM.Build.0 = Debug|ARM
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|Win32.ActiveCfg = Debug|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|Win32.Build.0 = Debug|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|x64.ActiveCfg = Debug|x64
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Debug|x64.Build.0 = Debug|x64
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|Any CPU.ActiveCfg = Release|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|ARM.ActiveCfg = Release|ARM
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|ARM.Build.0 = Release|ARM
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|Win32.ActiveCfg = Release|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|Win32.Build.0 = Release|Win32
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|x64.ActiveCfg = Release|x64
+		{BA69218F-DA5C-4D14-A78D-21A9E4DEC669}.Release|x64.Build.0 = Release|x64
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|Any CPU.Deploy.0 = Debug|Any CPU
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|ARM.ActiveCfg = Debug|ARM
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|ARM.Build.0 = Debug|ARM
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|ARM.Deploy.0 = Debug|ARM
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|Win32.ActiveCfg = Debug|x86
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|Win32.Build.0 = Debug|x86
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|Win32.Deploy.0 = Debug|x86
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|x64.ActiveCfg = Debug|x64
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|x64.Build.0 = Debug|x64
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Debug|x64.Deploy.0 = Debug|x64
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|Any CPU.Build.0 = Release|Any CPU
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|Any CPU.Deploy.0 = Release|Any CPU
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|ARM.ActiveCfg = Release|ARM
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|ARM.Build.0 = Release|ARM
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|ARM.Deploy.0 = Release|ARM
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|Win32.ActiveCfg = Release|x86
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|Win32.Build.0 = Release|x86
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|Win32.Deploy.0 = Release|x86
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|x64.ActiveCfg = Release|x64
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|x64.Build.0 = Release|x64
+		{D70A3790-48CE-4E58-AF60-EBEFC22E9C7A}.Release|x64.Deploy.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/samples/winrt/JavaScript/MediaCaptureJavaScript_TemporaryKey.pfx b/samples/winrt/JavaScript/MediaCaptureJavaScript_TemporaryKey.pfx
new file mode 100644
index 000000000..242376d0e
Binary files /dev/null and b/samples/winrt/JavaScript/MediaCaptureJavaScript_TemporaryKey.pfx differ
diff --git a/samples/winrt/JavaScript/css/default.css b/samples/winrt/JavaScript/css/default.css
new file mode 100644
index 000000000..e47ef4028
--- /dev/null
+++ b/samples/winrt/JavaScript/css/default.css
@@ -0,0 +1,6 @@
+﻿/* styles */
+
+#featureLabel
+{
+    padding-top: 50px;
+}
\ No newline at end of file
diff --git a/samples/winrt/JavaScript/default.html b/samples/winrt/JavaScript/default.html
new file mode 100644
index 000000000..b229ddaf4
--- /dev/null
+++ b/samples/winrt/JavaScript/default.html
@@ -0,0 +1,31 @@
+﻿<!-- Copyright (c) Microsoft Corporation. All rights reserved. -->
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8" />
+    <title>OpenCV for Windows RT</title>
+
+    <!-- WinJS references -->
+    <link rel="stylesheet" href="//Microsoft.WinJS.2.0/css/ui-light.css" />
+    <script src="//Microsoft.WinJS.2.0/js/base.js"></script>
+    <script src="//Microsoft.WinJS.2.0/js/ui.js"></script>
+
+    <!-- SDK sample framework references -->
+    <link rel="stylesheet" href="/sample-utils/sample-utils.css" />
+    <link rel="stylesheet" href="/css/default.css" />
+    <script src="/sample-utils/sample-utils.js"></script>
+    <script src="/js/default.js"></script>
+</head>
+<body role="application">
+    <div id="rootGrid">
+        <div id="content">
+            <h4>
+                <img src="images/windows-sdk.png"  />
+                <span>OpenCV for Windows RT</span>
+            </h4>
+            <h1 id="featureLabel"></h1>
+            <div id="contentHost"></div>
+        </div>
+    </div>
+</body>
+</html>
diff --git a/samples/winrt/JavaScript/html/AdvancedCapture.html b/samples/winrt/JavaScript/html/AdvancedCapture.html
new file mode 100644
index 000000000..452399296
--- /dev/null
+++ b/samples/winrt/JavaScript/html/AdvancedCapture.html
@@ -0,0 +1,40 @@
+﻿<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+    <title></title>
+    <link rel="stylesheet" href="/css/default.css" />
+    <script src="/js/AdvancedCapture.js"></script>
+</head>
+<body>
+    <div data-win-control="SdkSample.ScenarioInput">
+        <p>
+            This scenario shows how to enumerate cameras in the system.
+            Choose a camera from the list to start previewing from that camera. You can add additional effect
+            using the dropdown provided.
+        </p>
+        <select id="cameraSelect" aria-labelledby="listLabel"></select>
+        <button id="btnStartDevice" disabled="disabled">Start Device</button>
+        <button id="btnStartPreview" disabled="disabled">Start Preview</button>
+        <select id="videoEffect" disabled="disabled">
+            <option>Preview</option>
+            <option>Grayscale</option>
+            <option>Canny</option>
+            <option>Sobel</option>
+            <option>Histogram</option>
+        </select>
+    </div>
+    <div data-win-control="SdkSample.ScenarioOutput">
+        <table>
+            <tr>
+                <td>Preview</td>
+            </tr>
+            <tr>
+                <td>
+                    <video width="320" height="240" id="previewVideo" style="border: 1px solid black">
+                    </video>
+                </td>
+            </tr>
+        </table>
+    </div>
+</body>
+</html>
diff --git a/samples/winrt/JavaScript/images/logo.scale-100.png b/samples/winrt/JavaScript/images/logo.scale-100.png
new file mode 100644
index 000000000..ea685d651
Binary files /dev/null and b/samples/winrt/JavaScript/images/logo.scale-100.png differ
diff --git a/samples/winrt/JavaScript/images/microsoft-sdk.png b/samples/winrt/JavaScript/images/microsoft-sdk.png
new file mode 100644
index 000000000..380a01026
Binary files /dev/null and b/samples/winrt/JavaScript/images/microsoft-sdk.png differ
diff --git a/samples/winrt/JavaScript/images/smallTile-sdk.png b/samples/winrt/JavaScript/images/smallTile-sdk.png
new file mode 100644
index 000000000..5546e8b24
Binary files /dev/null and b/samples/winrt/JavaScript/images/smallTile-sdk.png differ
diff --git a/samples/winrt/JavaScript/images/smalllogo.scale-100.png b/samples/winrt/JavaScript/images/smalllogo.scale-100.png
new file mode 100644
index 000000000..efaf5468a
Binary files /dev/null and b/samples/winrt/JavaScript/images/smalllogo.scale-100.png differ
diff --git a/samples/winrt/JavaScript/images/splash-sdk.png b/samples/winrt/JavaScript/images/splash-sdk.png
new file mode 100644
index 000000000..901c3b085
Binary files /dev/null and b/samples/winrt/JavaScript/images/splash-sdk.png differ
diff --git a/samples/winrt/JavaScript/images/splashscreen.scale-100.png b/samples/winrt/JavaScript/images/splashscreen.scale-100.png
new file mode 100644
index 000000000..c951e031b
Binary files /dev/null and b/samples/winrt/JavaScript/images/splashscreen.scale-100.png differ
diff --git a/samples/winrt/JavaScript/images/squareTile-sdk.png b/samples/winrt/JavaScript/images/squareTile-sdk.png
new file mode 100644
index 000000000..126cf70d8
Binary files /dev/null and b/samples/winrt/JavaScript/images/squareTile-sdk.png differ
diff --git a/samples/winrt/JavaScript/images/storeLogo-sdk.png b/samples/winrt/JavaScript/images/storeLogo-sdk.png
new file mode 100644
index 000000000..2133f177d
Binary files /dev/null and b/samples/winrt/JavaScript/images/storeLogo-sdk.png differ
diff --git a/samples/winrt/JavaScript/images/storelogo.scale-100.png b/samples/winrt/JavaScript/images/storelogo.scale-100.png
new file mode 100644
index 000000000..dcb672712
Binary files /dev/null and b/samples/winrt/JavaScript/images/storelogo.scale-100.png differ
diff --git a/samples/winrt/JavaScript/images/tile-sdk.png b/samples/winrt/JavaScript/images/tile-sdk.png
new file mode 100644
index 000000000..cdec0dbdc
Binary files /dev/null and b/samples/winrt/JavaScript/images/tile-sdk.png differ
diff --git a/samples/winrt/JavaScript/images/windows-sdk.png b/samples/winrt/JavaScript/images/windows-sdk.png
new file mode 100644
index 000000000..af64bf00a
Binary files /dev/null and b/samples/winrt/JavaScript/images/windows-sdk.png differ
diff --git a/samples/winrt/JavaScript/js/AdvancedCapture.js b/samples/winrt/JavaScript/js/AdvancedCapture.js
new file mode 100644
index 000000000..211e1cd3b
--- /dev/null
+++ b/samples/winrt/JavaScript/js/AdvancedCapture.js
@@ -0,0 +1,161 @@
+﻿//// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+//// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+//// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+//// PARTICULAR PURPOSE.
+////
+//// Copyright (c) Microsoft Corporation. All rights reserved
+
+(function () {
+    "use strict";
+
+    var cameraList = null;
+    var mediaCaptureMgr = null;
+    var captureInitSettings = null;
+
+    var page = WinJS.UI.Pages.define("/html/AdvancedCapture.html", {
+
+        ready: function (element, options) {
+            scenarioInitialize();
+        },
+
+        unload: function (element, options) {
+            // release resources
+            releaseMediaCapture();
+        }
+    });
+
+    function scenarioInitialize() {
+        // Initialize the UI elements
+        id("btnStartDevice").disabled = false;
+        id("btnStartDevice").addEventListener("click", startDevice, false);
+        id("btnStartPreview").disabled = true;
+        id("videoEffect").disabled = true;
+        id("btnStartPreview").addEventListener("click", startPreview, false);
+        id("cameraSelect").addEventListener("change", onDeviceChange, false);
+
+        id("videoEffect").addEventListener('change', addEffectToImageStream, false);
+
+        enumerateCameras();
+    }
+
+    function initCameraSettings() {
+        captureInitSettings = new Windows.Media.Capture.MediaCaptureInitializationSettings();
+        captureInitSettings.streamingCaptureMode = Windows.Media.Capture.StreamingCaptureMode.video
+
+        // If the user chose another capture device, use it by default
+        var selectedIndex = id("cameraSelect").selectedIndex;
+        var deviceInfo = cameraList[selectedIndex];
+        captureInitSettings.videoDeviceId = deviceInfo.id;
+    }
+
+    // this function takes care of releasing the resources associated with media capturing
+    function releaseMediaCapture() {
+        if (mediaCaptureMgr) {
+            mediaCaptureMgr.close();
+            mediaCaptureMgr = null;
+        }
+    }
+
+    //Initialize media capture with the current settings
+    function startDevice() {
+        displayStatus("Starting device");
+        releaseMediaCapture();
+        initCameraSettings();
+
+        mediaCaptureMgr = new Windows.Media.Capture.MediaCapture();
+        mediaCaptureMgr.initializeAsync(captureInitSettings).done(function (result) {
+            // Update the UI
+            id("btnStartPreview").disabled = false;
+            id("btnStartDevice").disabled = true;
+            displayStatus("Device started");
+        });
+    }
+
+    function startPreview() {
+        displayStatus("Starting preview");
+        id("btnStartPreview").disabled = true;
+        id("videoEffect").disabled = false;
+        var video = id("previewVideo");
+        video.src = URL.createObjectURL(mediaCaptureMgr, { oneTimeOnly: true });
+        video.play();
+        displayStatus("Preview started");
+    }
+
+    function addEffectToImageStream() {
+        var effectId = id("videoEffect").selectedIndex;
+        var props = new Windows.Foundation.Collections.PropertySet();
+        props.insert("{698649BE-8EAE-4551-A4CB-3EC98FBD3D86}", effectId);
+
+        mediaCaptureMgr.clearEffectsAsync(Windows.Media.Capture.MediaStreamType.videoPreview).then(function () {
+            return mediaCaptureMgr.addEffectAsync(Windows.Media.Capture.MediaStreamType.videoPreview, 'OcvTransform.OcvImageManipulations', props);
+        }).then(function () {
+            displayStatus('Effect has been successfully added');
+        }, errorHandler);
+    }
+
+    function enumerateCameras() {
+        displayStatus("Enumerating capture devices");
+        var cameraSelect = id("cameraSelect");
+        cameraList = null;
+        cameraList = new Array();
+
+        // Clear the previous list of capture devices if any
+        while (cameraSelect.length > 0) {
+            cameraSelect.remove(0);
+        }
+
+        // Enumerate cameras and add them to the list
+        var deviceInfo = Windows.Devices.Enumeration.DeviceInformation;
+        deviceInfo.findAllAsync(Windows.Devices.Enumeration.DeviceClass.videoCapture).done(function (cameras) {
+            if (cameras.length === 0) {
+                cameraSelect.disabled = true;
+                displayError("No camera was found");
+                id("btnStartDevice").disabled = true;
+                cameraSelect.add(new Option("No cameras available"));
+            } else {
+                cameras.forEach(function (camera) {
+                    cameraList.push(camera);
+                    cameraSelect.add(new Option(camera.name));
+                });
+            }
+        }, errorHandler);
+    }
+
+    function onDeviceChange() {
+        releaseMediaCapture();
+        id("btnStartDevice").disabled = false;
+        id("btnStartPreview").disabled = true;
+        id("videoEffect").disabled = true;
+        displayStatus("");
+    }
+
+    function suspendingHandler(suspendArg) {
+        displayStatus("Suspended");
+        releaseMediaCapture();
+    }
+
+    function resumingHandler(resumeArg) {
+        displayStatus("Resumed");
+        scenarioInitialize();
+    }
+
+    function errorHandler(err) {
+        displayError(err.message);
+    }
+
+    function failedEventHandler(e) {
+        displayError("Fatal error", e.message);
+    }
+
+    function displayStatus(statusText) {
+        SdkSample.displayStatus(statusText);
+    }
+
+    function displayError(error) {
+        SdkSample.displayError(error);
+    }
+
+    function id(elementId) {
+        return document.getElementById(elementId);
+    }
+})();
diff --git a/samples/winrt/JavaScript/js/default.js b/samples/winrt/JavaScript/js/default.js
new file mode 100644
index 000000000..4aa17a3d6
--- /dev/null
+++ b/samples/winrt/JavaScript/js/default.js
@@ -0,0 +1,74 @@
+﻿//// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+//// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+//// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+//// PARTICULAR PURPOSE.
+////
+//// Copyright (c) Microsoft Corporation. All rights reserved
+
+
+(function () {
+    "use strict";
+
+    var sampleTitle = "OpenCV Image Manipulations sample";
+
+    var scenarios = [
+        { url: "/html/AdvancedCapture.html", title: "Enumerate cameras and add a video effect" },
+    ];
+
+    function activated(eventObject) {
+        if (eventObject.detail.kind === Windows.ApplicationModel.Activation.ActivationKind.launch) {
+            // Use setPromise to indicate to the system that the splash screen must not be torn down
+            // until after processAll and navigate complete asynchronously.
+            eventObject.setPromise(WinJS.UI.processAll().then(function () {
+                // Navigate to either the first scenario or to the last running scenario
+                // before suspension or termination.
+                var url = WinJS.Application.sessionState.lastUrl || scenarios[0].url;
+                return WinJS.Navigation.navigate(url);
+            }));
+        }
+    }
+
+    WinJS.Navigation.addEventListener("navigated", function (eventObject) {
+        var url = eventObject.detail.location;
+        var host = document.getElementById("contentHost");
+        // Call unload method on current scenario, if there is one
+        host.winControl && host.winControl.unload && host.winControl.unload();
+        WinJS.Utilities.empty(host);
+        eventObject.detail.setPromise(WinJS.UI.Pages.render(url, host, eventObject.detail.state).then(function () {
+            WinJS.Application.sessionState.lastUrl = url;
+        }));
+    });
+
+    WinJS.Namespace.define("SdkSample", {
+        sampleTitle: sampleTitle,
+        scenarios: scenarios,
+        mediaCaptureMgr: null,
+        photoFile: "photo.jpg",
+        deviceList: null,
+        recordState: null,
+        captureInitSettings: null,
+        encodingProfile: null,
+        storageFile: null,
+        photoStorage: null,
+        cameraControlSliders: null,
+
+
+        displayStatus: function (statusText) {
+            WinJS.log && WinJS.log(statusText, "MediaCapture", "status");
+        },
+
+        displayError: function (error) {
+            WinJS.log && WinJS.log(error, "MediaCapture", "error");
+        },
+
+        id: function (elementId) {
+            return document.getElementById(elementId);
+        },
+
+    });
+
+    WinJS.Application.addEventListener("activated", activated, false);
+    WinJS.Application.start();
+    Windows.UI.WebUI.WebUIApplication.addEventListener("suspending", SdkSample.suspendingHandler, false);
+    Windows.UI.WebUI.WebUIApplication.addEventListener("resuming", SdkSample.resumingHandler, false);
+})();
diff --git a/samples/winrt/JavaScript/package.appxmanifest b/samples/winrt/JavaScript/package.appxmanifest
new file mode 100644
index 000000000..aa4526c2a
--- /dev/null
+++ b/samples/winrt/JavaScript/package.appxmanifest
@@ -0,0 +1,35 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Package xmlns="http://schemas.microsoft.com/appx/2010/manifest" xmlns:m2="http://schemas.microsoft.com/appx/2013/manifest">
+  <Identity Name="d70a3790-48ce-4e58-af60-ebefc22e9c7a" Version="1.0.0.0" Publisher="CN=Sergei" />
+  <Properties>
+    <DisplayName>MediaCaptureJavaScript</DisplayName>
+    <PublisherDisplayName>Sergei</PublisherDisplayName>
+    <Logo>images\storelogo.png</Logo>
+  </Properties>
+  <Prerequisites>
+    <OSMinVersion>6.3.0</OSMinVersion>
+    <OSMaxVersionTested>6.3.0</OSMaxVersionTested>
+  </Prerequisites>
+  <Resources>
+    <Resource Language="x-generate" />
+  </Resources>
+  <Applications>
+    <Application Id="App" StartPage="default.html">
+      <m2:VisualElements DisplayName="MediaCaptureJavaScript" Description="MediaCaptureJavaScript" ForegroundText="light" BackgroundColor="#464646" Square150x150Logo="images\Logo.png" Square30x30Logo="images\SmallLogo.png">
+        <m2:SplashScreen Image="images\splashscreen.png" />
+      </m2:VisualElements>
+    </Application>
+  </Applications>
+  <Capabilities>
+    <Capability Name="internetClient" />
+    <DeviceCapability Name="webcam" />
+  </Capabilities>
+  <Extensions>
+    <Extension Category="windows.activatableClass.inProcessServer">
+      <InProcessServer>
+        <Path>OcvTransform.dll</Path>
+        <ActivatableClass ActivatableClassId="OcvTransform.OcvImageManipulations" ThreadingModel="both" />
+      </InProcessServer>
+    </Extension>
+  </Extensions>
+</Package>
\ No newline at end of file
diff --git a/samples/winrt/JavaScript/sample-utils/sample-utils.css b/samples/winrt/JavaScript/sample-utils/sample-utils.css
new file mode 100644
index 000000000..d209fafbb
--- /dev/null
+++ b/samples/winrt/JavaScript/sample-utils/sample-utils.css
@@ -0,0 +1,213 @@
+﻿/* Copyright (c) Microsoft Corporation. All rights reserved. */
+html
+{
+    cursor: default;
+}
+
+#featureLabel
+{
+    font: 20pt/24pt "Segoe UI Semilight";
+    margin:0;
+    padding:5px 0 10px 0;
+    font-weight: normal;
+}
+
+#inputLabel, #outputLabel
+{
+    font: 11pt/15pt "Segoe UI";
+    margin:0;
+    padding:0;
+    font-weight: normal;
+}
+
+#listLabel, #descLabel
+{
+    font: 11pt/15pt "Segoe UI Semilight";
+    font-weight:normal;
+}
+
+#rootGrid
+{
+    width: 100%;
+    height: 100%;
+    display: -ms-grid;
+    -ms-grid-columns: 100px 1fr 100px;
+    -ms-grid-rows: 20px auto 1fr auto 20px;
+}
+
+#header
+{
+    -ms-grid-column: 2;
+    -ms-grid-row: 2;
+}
+
+#content
+{
+    padding-right:20px;
+    padding-bottom:20px;
+    overflow:auto;
+    display:-ms-grid;
+    -ms-grid-columns:1fr;
+    -ms-grid-rows: auto 1fr;
+    -ms-grid-column: 2;
+    -ms-grid-row: 3;
+}
+
+#footer
+{
+    -ms-grid-column: 2;
+    -ms-grid-row: 4;
+    padding-bottom:10px;
+}
+
+#featureLabel
+{
+    -ms-grid-row: 1;
+}
+
+#contentHost
+{
+    display:-ms-grid;
+    -ms-grid-columns:1fr;
+    -ms-grid-rows: auto auto auto 1fr;
+    -ms-grid-row: 2;
+}
+
+#inputLabel
+{
+    -ms-grid-row: 1;
+}
+
+
+#input
+{
+    -ms-grid-row: 2;
+    display: -ms-grid;
+    -ms-grid-columns: auto auto;
+    -ms-grid-rows: auto;
+    margin-top:10px;
+}
+
+#outputLabel
+{
+    -ms-grid-row: 3;
+    padding-top:10px;
+    padding-bottom:10px;
+}
+
+#output
+{
+    height:100%;
+    -ms-grid-row: 4;
+    -ms-grid-row-align:stretch;
+}
+
+.clear
+{
+    clear:both;
+}
+
+
+#footer span
+{
+    font-size:12px;
+}
+
+#footer .company
+{
+    float:left;
+}
+
+#footer .links
+{
+    float:right;
+}
+
+#footer .links a
+{
+    font-size:12px;
+    margin-left:8px;
+    text-decoration:none;
+}
+
+#footer .links .pipe
+{
+    font-size:9px;
+    margin-left:8px;
+}
+
+#statusMessage
+{
+    margin-bottom:5px;
+}
+
+#input .options
+{
+    -ms-grid-row: 1;
+    -ms-grid-column: 1;
+}
+
+#input .details
+{
+    -ms-grid-row: 1;
+    -ms-grid-column: 2;
+    cursor:text;
+}
+
+.imageHolder
+{
+    max-width:382px;
+}
+
+.imageHolder.withText
+{
+     float:left;
+     margin-right:10px;
+}
+
+#scenarios
+{
+    margin-right:20px;
+}
+
+
+
+@media screen and (min-width: 800px) and (max-width: 1024px)
+{
+    #rootGrid
+    {
+        -ms-grid-columns: 40px 1fr 40px;
+    }
+}
+
+@media screen and (max-width: 799px)
+{
+    #rootGrid
+    {
+        -ms-grid-columns: 20px 1fr 20px;
+    }
+
+    #output
+    {
+        padding-bottom:20px;
+    }
+
+    #input
+    {
+        -ms-grid-columns: auto;
+        -ms-grid-rows: auto auto;
+    }
+
+    #input .options
+    {
+        -ms-grid-row: 1;
+        -ms-grid-column: 1;
+        margin-bottom:10px;
+    }
+
+    #input .details
+    {
+        -ms-grid-row: 2;
+        -ms-grid-column: 1;
+    }
+}
\ No newline at end of file
diff --git a/samples/winrt/JavaScript/sample-utils/sample-utils.js b/samples/winrt/JavaScript/sample-utils/sample-utils.js
new file mode 100644
index 000000000..ad2834d8c
--- /dev/null
+++ b/samples/winrt/JavaScript/sample-utils/sample-utils.js
@@ -0,0 +1,204 @@
+﻿//// Copyright (c) Microsoft Corporation. All rights reserved
+
+// This file is a part of the SDK sample framework. For code demonstrating scenarios in this particular sample,
+// please see the html, css and js folders.
+
+(function () {
+
+    //
+    // Helper controls used in the sample pages
+    //
+
+    // The ScenarioInput control inserts the appropriate markup to get labels & controls
+    // hooked into the input section of a scenario page so that it's not repeated in
+    // every one.
+
+    var lastError = "";
+    var lastStatus = "";
+    var ScenarioInput = WinJS.Class.define(
+        function (element, options) {
+        element.winControl = this;
+        this.element = element;
+
+        new WinJS.Utilities.QueryCollection(element)
+                    .setAttribute("role", "main")
+                    .setAttribute("aria-labelledby", "inputLabel");
+        element.id = "input";
+
+        this.addInputLabel(element);
+        this.addDetailsElement(element);
+        this.addScenariosPicker(element);
+    }, {
+        addInputLabel: function (element) {
+            var label = document.createElement("h2");
+            label.textContent = "Input";
+            label.id = "inputLabel";
+            element.parentNode.insertBefore(label, element);
+        },
+        addScenariosPicker: function (parentElement) {
+            var scenarios = document.createElement("div");
+            scenarios.id = "scenarios";
+            var control = new ScenarioSelect(scenarios);
+
+            parentElement.insertBefore(scenarios, parentElement.childNodes[0]);
+        },
+
+        addDetailsElement: function (sourceElement) {
+            var detailsDiv = this._createDetailsDiv();
+            while (sourceElement.childNodes.length > 0) {
+                detailsDiv.appendChild(sourceElement.removeChild(sourceElement.childNodes[0]));
+            }
+            sourceElement.appendChild(detailsDiv);
+        },
+        _createDetailsDiv: function () {
+            var detailsDiv = document.createElement("div");
+
+            new WinJS.Utilities.QueryCollection(detailsDiv)
+                        .addClass("details")
+                        .setAttribute("role", "region")
+                        .setAttribute("aria-labelledby", "descLabel")
+                        .setAttribute("aria-live", "assertive");
+
+            var label = document.createElement("h3");
+            label.textContent = "Description";
+            label.id = "descLabel";
+
+            detailsDiv.appendChild(label);
+            return detailsDiv;
+        },
+    }
+    );
+
+    // The ScenarioOutput control inserts the appropriate markup to get labels & controls
+    // hooked into the output section of a scenario page so that it's not repeated in
+    // every one.
+
+    var ScenarioOutput = WinJS.Class.define(
+        function (element, options) {
+        element.winControl = this;
+        this.element = element;
+        new WinJS.Utilities.QueryCollection(element)
+                    .setAttribute("role", "region")
+                    .setAttribute("aria-labelledby", "outputLabel")
+                    .setAttribute("aria-live", "assertive");
+        element.id = "output";
+
+        this._addOutputLabel(element);
+        this._addStatusOutput(element);
+    }, {
+        _addOutputLabel: function (element) {
+            var label = document.createElement("h2");
+            label.id = "outputLabel";
+            label.textContent = "Output";
+            element.parentNode.insertBefore(label, element);
+        },
+        _addStatusOutput: function (element) {
+            var statusDiv = document.createElement("div");
+            statusDiv.id = "statusMessage";
+            statusDiv.setAttribute("role", "textbox");
+            element.insertBefore(statusDiv, element.childNodes[0]);
+        }
+    }
+    );
+
+
+    // Sample infrastructure internals
+
+    var currentScenarioUrl = null;
+
+    WinJS.Navigation.addEventListener("navigating", function (evt) {
+        currentScenarioUrl = evt.detail.location;
+    });
+
+    WinJS.log = function (message, tag, type) {
+        var isError = (type === "error");
+        var isStatus = (type === "status");
+
+        if (isError || isStatus) {
+            var statusDiv = /* @type(HTMLElement) */ document.getElementById("statusMessage");
+            if (statusDiv) {
+                statusDiv.innerText = message;
+                if (isError) {
+                    lastError = message;
+                    statusDiv.style.color = "blue";
+                } else if (isStatus) {
+                    lastStatus = message;
+                    statusDiv.style.color = "green";
+                }
+            }
+        }
+    };
+
+    // Control that populates and runs the scenario selector
+
+    var ScenarioSelect = WinJS.UI.Pages.define("/sample-utils/scenario-select.html", {
+        ready: function (element, options) {
+            var that = this;
+            var selectElement = WinJS.Utilities.query("#scenarioSelect", element);
+            this._selectElement = selectElement[0];
+
+            SdkSample.scenarios.forEach(function (s, index) {
+                that._addScenario(index, s);
+            });
+
+            selectElement.listen("change", function (evt) {
+                var select = evt.target;
+                if (select.selectedIndex >= 0) {
+                    var newUrl = select.options[select.selectedIndex].value;
+                    WinJS.Navigation.navigate(newUrl);
+                }
+            });
+            selectElement[0].size = (SdkSample.scenarios.length > 5 ? 5 : SdkSample.scenarios.length);
+            if (SdkSample.scenarios.length === 1) {
+                // Avoid showing down arrow when there is only one scenario
+                selectElement[0].setAttribute("multiple", "multiple");
+            }
+
+            // Use setImmediate to ensure that the select element is set as active only after
+            // the scenario page has been constructed.
+            setImmediate(function () {
+                that._selectElement.setActive();
+            });
+        },
+
+        _addScenario: function (index, info) {
+            var option = document.createElement("option");
+            if (info.url === currentScenarioUrl) {
+                option.selected = "selected";
+            }
+            option.text = (index + 1) + ") " + info.title;
+            option.value = info.url;
+            this._selectElement.appendChild(option);
+        }
+    });
+
+    function activated(e) {
+        WinJS.Utilities.query("#featureLabel")[0].textContent = SdkSample.sampleTitle;
+    }
+
+    WinJS.Application.addEventListener("activated", activated, false);
+
+    // Export public methods & controls
+    WinJS.Namespace.define("SdkSample", {
+        ScenarioInput: ScenarioInput,
+        ScenarioOutput: ScenarioOutput
+    });
+
+    // SDK Sample Test helper
+    document.TestSdkSample = {
+        getLastError: function () {
+            return lastError;
+        },
+
+        getLastStatus: function () {
+            return lastStatus;
+        },
+
+        selectScenario: function (scenarioID) {
+            scenarioID = scenarioID >> 0;
+            var select = document.getElementById("scenarioSelect");
+            var newUrl = select.options[scenarioID - 1].value;
+            WinJS.Navigation.navigate(newUrl);
+        }
+    };
+})();
diff --git a/samples/winrt/JavaScript/sample-utils/scenario-select.html b/samples/winrt/JavaScript/sample-utils/scenario-select.html
new file mode 100644
index 000000000..011fabee6
--- /dev/null
+++ b/samples/winrt/JavaScript/sample-utils/scenario-select.html
@@ -0,0 +1,15 @@
+﻿<!-- Copyright (c) Microsoft Corporation. All rights reserved. -->
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml">
+    <head>
+        <title></title>
+    </head>
+    <body>
+        <div class="options">
+             <h3 id="listLabel">Select scenario:</h3>
+            <select id="scenarioSelect" aria-labelledby="listLabel">
+                <!-- scenario list is inserted here -->
+            </select>
+        </div>
+    </body>
+</html>
diff --git a/samples/winrt/OcvImageProcessing/OcvImageProcessing/MainPage.xaml.cpp b/samples/winrt/OcvImageProcessing/OcvImageProcessing/MainPage.xaml.cpp
index 2e91eb156..fc7440fb2 100644
--- a/samples/winrt/OcvImageProcessing/OcvImageProcessing/MainPage.xaml.cpp
+++ b/samples/winrt/OcvImageProcessing/OcvImageProcessing/MainPage.xaml.cpp
@@ -10,6 +10,10 @@
 #include <Robuffer.h>
 #include <vector>
 #include <opencv2\imgproc\types_c.h>
+#include <opencv2\imgcodecs\imgcodecs.hpp>
+#include <opencv2\core\core.hpp>
+
+#include <windows.storage.h>
 
 using namespace OcvImageProcessing;
 
@@ -18,6 +22,7 @@ using namespace concurrency;
 using namespace Platform;
 using namespace Windows::Foundation;
 using namespace Windows::Storage::Streams;
+using namespace Windows::Storage;
 using namespace Windows::UI::Xaml::Media::Imaging;
 using namespace Windows::Graphics::Imaging;
 using namespace Windows::Foundation::Collections;
@@ -37,6 +42,17 @@ MainPage::MainPage()
 {
     InitializeComponent();
 
+#ifdef __OPENCV_IMGCODECS_HPP__
+
+    // Image loading OpenCV way ... way more simple
+    cv::Mat image = cv::imread("Assets/Lena.png");
+    Lena = cv::Mat(image.rows, image.cols, CV_8UC4);
+    cvtColor(image, Lena, CV_BGR2BGRA);
+    UpdateImage(Lena);
+
+#else
+
+    // Image loading WinRT way
     RandomAccessStreamReference^ streamRef = RandomAccessStreamReference::CreateFromUri(InputImageUri);
 
     task<IRandomAccessStreamWithContentType^> (streamRef->OpenReadAsync()).
@@ -68,6 +84,67 @@ MainPage::MainPage()
         memcpy(Lena.data, srcPixels->Data, 4*frameWidth*frameHeight);
         UpdateImage(Lena);
     });
+
+#endif
+}
+
+/// <summary>
+/// Temporary file creation example. Will be created in WinRT application temporary directory
+/// which usually is "C:\Users\{username}\AppData\Local\Packages\{package_id}\TempState\{random_name}.{suffix}"
+/// </summary>
+/// <param name="suffix">Temporary file suffix, e.g. "tmp"</param>
+std::string OcvImageProcessing::MainPage::CreateTempFile(const std::string &suffix) {
+    return cv::tempfile(suffix.c_str());
+}
+
+/// <summary>
+/// Creating/writing a file in the application local directory
+/// </summary>
+/// <param name="path">Image to save</param>
+bool OcvImageProcessing::MainPage::SaveImage(cv::Mat image) {
+    StorageFolder^ localFolderRT = ApplicationData::Current->LocalFolder;
+    cv::String localFile = ConvertPath(ApplicationData::Current->LocalFolder->Path) + "\\Lena.png";
+
+    return cv::imwrite(localFile, image);
+}
+
+/// <summary>
+/// Getting std::string from managed string via std::wstring.
+/// Provides an example of three ways to do it.
+/// Can't use this one: https://msdn.microsoft.com/en-us/library/bb384865.aspx, not available on WinRT.
+/// </summary>
+/// <param name="path">Path to be converted</param>
+cv::String OcvImageProcessing::MainPage::ConvertPath(Platform::String^ path) {
+    std::wstring localPathW(path->Begin());
+
+    // Opt #1
+    //std::string localPath(localPathW.begin(), localPathW.end());
+
+    // Opt #2
+    //std::string localPath(StrToWStr(localPathW));
+
+    // Opt #3
+    size_t outSize = localPathW.length() + 1;
+    char* localPathC = new char[outSize];
+    size_t charsConverted = 0;
+    wcstombs_s(&charsConverted, localPathC, outSize, localPathW.c_str(), localPathW.length());
+    cv::String localPath(localPathC);
+
+    // Implicit conversion from std::string to cv::String
+    return localPath;
+}
+
+std::string OcvImageProcessing::MainPage::StrToWStr(const std::wstring &input) {
+    if (input.empty()) {
+        return std::string();
+    }
+
+    int size = WideCharToMultiByte(CP_UTF8, 0, &input[0], (int)input.size(), NULL, 0, NULL, NULL);
+    std::string result(size, 0);
+
+    WideCharToMultiByte(CP_UTF8, 0, &input[0], (int)input.size(), &result[0], size, NULL, NULL);
+
+    return result;
 }
 
 /// <summary>
@@ -91,15 +168,16 @@ void OcvImageProcessing::MainPage::UpdateImage(const cv::Mat& image)
 
     // Obtain IBufferByteAccess
     ComPtr<IBufferByteAccess> pBufferByteAccess;
-    ComPtr<IUnknown> pBuffer((IUnknown*)buffer);
+    ComPtr<IInspectable> pBuffer((IInspectable*)buffer);
     pBuffer.As(&pBufferByteAccess);
 
     // Get pointer to pixel bytes
     pBufferByteAccess->Buffer(&dstPixels);
-    memcpy(dstPixels, image.data, 4*image.cols*image.rows);
+    memcpy(dstPixels, image.data, image.step.buf[1]*image.cols*image.rows);
 
     // Set the bitmap to the Image element
-    PreviewWidget->Source = bitmap;}
+    PreviewWidget->Source = bitmap;
+}
 
 
 cv::Mat OcvImageProcessing::MainPage::ApplyGrayFilter(const cv::Mat& image)
diff --git a/samples/winrt/OcvImageProcessing/OcvImageProcessing/MainPage.xaml.h b/samples/winrt/OcvImageProcessing/OcvImageProcessing/MainPage.xaml.h
index 79c1ac74c..bb7c4c33d 100644
--- a/samples/winrt/OcvImageProcessing/OcvImageProcessing/MainPage.xaml.h
+++ b/samples/winrt/OcvImageProcessing/OcvImageProcessing/MainPage.xaml.h
@@ -39,6 +39,11 @@ namespace OcvImageProcessing
         cv::Mat ApplySepiaFilter(const cv::Mat& image);
 
         void UpdateImage(const cv::Mat& image);
+        std::string CreateTempFile(const std::string &suffix);
+        bool SaveImage(cv::Mat image);
+
+        std::string StrToWStr(const std::wstring &wstr);
+        cv::String ConvertPath(Platform::String^ path);
 
         cv::Mat Lena;
         unsigned int frameWidth, frameHeight;
diff --git a/samples/winrt/OcvImageProcessing/OcvImageProcessing/opencv.props b/samples/winrt/OcvImageProcessing/OcvImageProcessing/opencv.props
index 40eaffd1f..64b0ac98a 100644
--- a/samples/winrt/OcvImageProcessing/OcvImageProcessing/opencv.props
+++ b/samples/winrt/OcvImageProcessing/OcvImageProcessing/opencv.props
@@ -17,6 +17,9 @@
     <None Include="$(OpenCV_Bin)opencv_imgproc300$(DebugSuffix).dll">
       <DeploymentContent>true</DeploymentContent>
     </None>
+    <None Include="$(OpenCV_Bin)opencv_imgcodecs300$(DebugSuffix).dll">
+      <DeploymentContent>true</DeploymentContent>
+    </None>
     <None Include="$(OpenCV_Bin)opencv_features2d300$(DebugSuffix).dll">
       <DeploymentContent>true</DeploymentContent>
     </None>
@@ -33,7 +36,7 @@
     </ClCompile>
     <Link>
       <!--Add required OpenCV libs here-->
-      <AdditionalDependencies>opencv_core300$(DebugSuffix).lib;opencv_imgproc300$(DebugSuffix).lib;opencv_features2d300$(DebugSuffix).lib;opencv_flann300$(DebugSuffix).lib;opencv_ml300$(DebugSuffix).lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>opencv_core300$(DebugSuffix).lib;opencv_imgproc300$(DebugSuffix).lib;opencv_features2d300$(DebugSuffix).lib;opencv_flann300$(DebugSuffix).lib;opencv_ml300$(DebugSuffix).lib;opencv_imgcodecs300$(DebugSuffix).lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AdditionalLibraryDirectories>$(OpenCV_Lib);%(AdditionalLibraryDirectories);</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>